Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Hector Lopez
		
	commited on
		
		
					Commit 
							
							·
						
						c5b702e
	
1
								Parent(s):
							
							c6d3bd0
								
Upload application logic
Browse files- app.py +36 -0
- backend.py +44 -0
- requirements.txt +3 -0
- tweet_scraper.py +48 -0
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,36 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            Gradio Twitter analizer application.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            This module provides a gradio-based web application
         | 
| 5 | 
            +
            for the Twitter analyzer project.
         | 
| 6 | 
            +
            """
         | 
| 7 | 
            +
            import gradio as gr
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            from tweet_scraper import retrieve_tweet_text
         | 
| 10 | 
            +
            from backend import predict_positivity
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            def process_tweet(url: str) -> str:
         | 
| 14 | 
            +
                """
         | 
| 15 | 
            +
                Get a tweet's positivity.
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                Args:
         | 
| 18 | 
            +
                    url (str): Tweet's URL.
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                Returns:
         | 
| 21 | 
            +
                    str: Predicted positivity
         | 
| 22 | 
            +
                """
         | 
| 23 | 
            +
                text = retrieve_tweet_text(url)
         | 
| 24 | 
            +
                outcome = predict_positivity(text)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                return outcome
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            app = gr.Interface(
         | 
| 30 | 
            +
                fn=process_tweet,
         | 
| 31 | 
            +
                inputs=gr.inputs.Textbox(lines=2, placeholder="Tweet url..."),
         | 
| 32 | 
            +
                outputs="text",
         | 
| 33 | 
            +
            )
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            if __name__ == "__main__":
         | 
| 36 | 
            +
                app, local_url, share_url = app.launch()
         | 
    	
        backend.py
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            Positivity predictor.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            This module provides the functionality to predict
         | 
| 5 | 
            +
            a tweet's positivity using a BERT model.
         | 
| 6 | 
            +
            """
         | 
| 7 | 
            +
            import torch
         | 
| 8 | 
            +
            from transformers import BertForSequenceClassification, BertTokenizer
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)
         | 
| 11 | 
            +
            model = BertForSequenceClassification.from_pretrained(
         | 
| 12 | 
            +
                "bert-base-uncased",
         | 
| 13 | 
            +
                num_labels=5,
         | 
| 14 | 
            +
                output_attentions=False,
         | 
| 15 | 
            +
                output_hidden_states=False,
         | 
| 16 | 
            +
                local_files_only=True,
         | 
| 17 | 
            +
            )
         | 
| 18 | 
            +
            model.load_state_dict(torch.load("data/BERT_ft_epoch5.model"))
         | 
| 19 | 
            +
            model.eval()
         | 
| 20 | 
            +
             | 
| 21 | 
            +
             | 
| 22 | 
            +
            def predict_positivity(text: str) -> str:
         | 
| 23 | 
            +
                """
         | 
| 24 | 
            +
                Predict the positivity of a given tweet.
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                Args:
         | 
| 27 | 
            +
                    text (str): Tweet's text.
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                Returns:
         | 
| 30 | 
            +
                    str: Predicted positivity.
         | 
| 31 | 
            +
                """
         | 
| 32 | 
            +
                label_dict = {
         | 
| 33 | 
            +
                    0: "Extremely Negative",
         | 
| 34 | 
            +
                    1: "Negative",
         | 
| 35 | 
            +
                    2: "Neutral",
         | 
| 36 | 
            +
                    3: "Positive",
         | 
| 37 | 
            +
                    4: "Extremely Positive",
         | 
| 38 | 
            +
                }
         | 
| 39 | 
            +
                encoded = tokenizer(text, return_tensors="pt")
         | 
| 40 | 
            +
                logits = model(**encoded).logits
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                predicted_class_id = logits.argmax().item()
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                return label_dict[predicted_class_id]
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            torch
         | 
| 2 | 
            +
            transformers
         | 
| 3 | 
            +
            gradio
         | 
    	
        tweet_scraper.py
    ADDED
    
    | @@ -0,0 +1,48 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            Twitter scraper.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            This module provides the functionality to retrieve
         | 
| 5 | 
            +
            a tweet's text given a tweet's URL.
         | 
| 6 | 
            +
            """
         | 
| 7 | 
            +
            import re
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            import requests
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            def retrieve_tweet_text(tweet_url: str) -> str:
         | 
| 13 | 
            +
                """
         | 
| 14 | 
            +
                Retrieve a tweet's text.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                Args:
         | 
| 17 | 
            +
                    tweet_url (url): Tweet's URL.
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                Returns:
         | 
| 20 | 
            +
                    str: Tweet's parsed text.
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                # Get the url to retrieve tweet-related data
         | 
| 23 | 
            +
                url = (
         | 
| 24 | 
            +
                    "https://publish.twitter.com/oembed?dnt=true",
         | 
| 25 | 
            +
                    f"&omit_script=true&url={tweet_url}",
         | 
| 26 | 
            +
                )
         | 
| 27 | 
            +
                url = str.join("", url)
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                # Get the raw html containing th tweet text
         | 
| 30 | 
            +
                raw_html = requests.get(url).json()["html"]
         | 
| 31 | 
            +
                # Remove links from text
         | 
| 32 | 
            +
                pattern = r"<[a][^>]*>(.+?)</[a]>"
         | 
| 33 | 
            +
                html = re.sub(pattern, "", raw_html)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                # Remove the HTML tags from the text
         | 
| 36 | 
            +
                text = [i.strip() for i in re.sub("<.*?>", "", html).splitlines() if i][0]
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                # If there is a picture, remove all the text after it
         | 
| 39 | 
            +
                if "pic" in text:
         | 
| 40 | 
            +
                    idx = text.index("pic")
         | 
| 41 | 
            +
                    text = text[:idx]
         | 
| 42 | 
            +
                # If there is no picture, the &mdash defines the tweet's
         | 
| 43 | 
            +
                # end.
         | 
| 44 | 
            +
                elif "&mdash" in text:
         | 
| 45 | 
            +
                    idx = text.index("&mdash")
         | 
| 46 | 
            +
                    text = text[:idx]
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                return text
         | 
