File size: 2,365 Bytes
9df4cc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from finnlp.data_sources.social_media._base import Social_Media_Downloader
from tqdm.notebook import tqdm
import pandas as pd
import finnhub
import time

class Finnhub_Sentiment(Social_Media_Downloader):
    def __init__(self, args = {}):
        super().__init__(args)
        assert "token" in args.keys(), "Please input your finnhub token. Avaliable at https://finnhub.io/dashboard"
        self.finnhub_client = finnhub.Client(api_key=args["token"])
        self.delay = args["delay"] if "dalay" in args.keys() else 0.7

    def download_sentiment(self, start_date, end_date, stock = "APPL"):
        self.reddit = pd.DataFrame()
        self.twitter = pd.DataFrame()
        self.date_list = pd.date_range(start_date,end_date)
        days_each_time = 4
        date_list = self.date_list
        # cal total lenth
        if len(date_list)%days_each_time == 0:
            total = len(date_list)//days_each_time
        else:
            total = len(date_list)//days_each_time+1
        with tqdm(total=total) as bar:
            while len(date_list):
                tmp_date_list = date_list[:days_each_time]
                date_list = date_list[days_each_time:]
                tmp_start_date = tmp_date_list[0].strftime("%Y-%m-%d")
                tmp_end_date = tmp_date_list[-1].strftime("%Y-%m-%d")
                reddit, _stock_name, twitter = self.gather_one_day_sentiment(tmp_start_date,tmp_end_date,stock = stock )
                self.reddit = pd.concat([self.reddit,reddit])
                self.twitter = pd.concat([self.twitter,twitter])
                bar.update(1)
        self.reddit = self.reddit.sort_values("atTime")
        self.twitter = self.twitter.sort_values("atTime")

    def gather_one_day_sentiment(self,start_date, end_date, stock = "APPL"):
        res  = self.finnhub_client.stock_social_sentiment(stock, _from=start_date, to=end_date)
        reddit = res["reddit"]
        symbol = res["symbol"]
        twitter = res["twitter"]
        reddit = pd.DataFrame(reddit)
        # print(reddit)
        
        twitter = pd.DataFrame(twitter)
        try:
            reddit["atTime"] = pd.to_datetime(reddit["atTime"],errors = "ignore")
            twitter["atTime"] = pd.to_datetime(twitter["atTime"],errors = "ignore")
        except:
            pass
        time.sleep(self.delay)
        return reddit,symbol,twitter