{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"..\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Import" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.twitter import Twitter_Downloader" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Config" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "stock = \"AAPL\"\n", "start_date = \"2023-01-01\"\n", "end_date = \"2023-01-05\"" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Downloader" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "downloader = Twitter_Downloader()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f95d73d68fff4354aadfd0482bb52952", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/5 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "downloader.download(start_date, end_date,stock)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(84, 38)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "downloader.dataframe.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | created_at | \n", "id | \n", "id_str | \n", "full_text | \n", "truncated | \n", "display_text_range | \n", "entities | \n", "extended_entities | \n", "source | \n", "in_reply_to_status_id | \n", "... | \n", "retweeted | \n", "possibly_sensitive | \n", "possibly_sensitive_editable | \n", "lang | \n", "supplemental_language | \n", "self_thread | \n", "quoted_status_id | \n", "quoted_status_id_str | \n", "quoted_status_permalink | \n", "card | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2023-01-01 01:25:43+00:00 | \n", "1609360184694157312 | \n", "1609360184694157312 | \n", "2022 was the birth this movement. 2023 is when... | \n", "False | \n", "[0, 147] | \n", "{'hashtags': [{'text': 'SPY', 'indices': [97, ... | \n", "NaN | \n", "<a href=\"https://mobile.twitter.com\" rel=\"nofo... | \n", "1609360182714241024 | \n", "... | \n", "False | \n", "NaN | \n", "NaN | \n", "en | \n", "None | \n", "{'id': 1609360176640925699, 'id_str': '1609360... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "2023-01-01 03:20:49+00:00 | \n", "1609389151253835777 | \n", "1609389151253835777 | \n", "たくさんコメントありがとうございました☺️ | \n", "False | \n", "[0, 21] | \n", "{'hashtags': [], 'symbols': [], 'user_mentions... | \n", "NaN | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "None | \n", "... | \n", "False | \n", "NaN | \n", "NaN | \n", "ja | \n", "None | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "2023-01-01 04:21:54+00:00 | \n", "1609404522803363846 | \n", "1609404522803363846 | \n", "The fall of Apple iphone market share.\\n- peak... | \n", "False | \n", "[0, 212] | \n", "{'hashtags': [], 'symbols': [{'text': 'AAPL', ... | \n", "{'media': [{'id': 1609404518500032514, 'id_str... | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "None | \n", "... | \n", "False | \n", "False | \n", "True | \n", "en | \n", "None | \n", "{'id': 1609404522803363846, 'id_str': '1609404... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "2023-01-01 04:32:23+00:00 | \n", "1609407163671400448 | \n", "1609407163671400448 | \n", "Apple iphone market share peaked in H1 2009 an... | \n", "False | \n", "[0, 160] | \n", "{'hashtags': [], 'symbols': [{'text': 'AAPL', ... | \n", "{'media': [{'id': 1609407158696972289, 'id_str... | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "1609404522803363846 | \n", "... | \n", "False | \n", "False | \n", "True | \n", "en | \n", "None | \n", "{'id': 1609404522803363846, 'id_str': '1609404... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "2023-01-01 04:43:47+00:00 | \n", "1609410032734711809 | \n", "1609410032734711809 | \n", "That sounds impossible if we look at how fast ... | \n", "False | \n", "[0, 272] | \n", "{'hashtags': [{'text': 'iPhone', 'indices': [2... | \n", "{'media': [{'id': 1609410028653645824, 'id_str... | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "1609407163671400448 | \n", "... | \n", "False | \n", "False | \n", "True | \n", "en | \n", "None | \n", "{'id': 1609404522803363846, 'id_str': '1609404... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
79 | \n", "2023-01-04 21:45:11+00:00 | \n", "1610754237004189710 | \n", "1610754237004189710 | \n", "APPLE $AAPL TO SIGN UP LUXSHARE TO PRODUCE IPH... | \n", "False | \n", "[0, 64] | \n", "{'hashtags': [], 'symbols': [{'text': 'AAPL', ... | \n", "NaN | \n", "<a href=\"https://mobile.twitter.com\" rel=\"nofo... | \n", "None | \n", "... | \n", "False | \n", "NaN | \n", "NaN | \n", "en | \n", "None | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
80 | \n", "2023-01-04 22:21:45+00:00 | \n", "1610763442092183585 | \n", "1610763442092183585 | \n", "$AAPL https://t.co/Fb8UbPUy9S | \n", "False | \n", "[0, 5] | \n", "{'hashtags': [], 'symbols': [{'text': 'AAPL', ... | \n", "{'media': [{'id': 1610763438053068835, 'id_str... | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "None | \n", "... | \n", "False | \n", "False | \n", "True | \n", "pl | \n", "None | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
81 | \n", "2023-01-04 22:53:05+00:00 | \n", "1610771324355346432 | \n", "1610771324355346432 | \n", "$AAPL This doesn't fix their demand issues fol... | \n", "False | \n", "[0, 49] | \n", "{'hashtags': [], 'symbols': [{'text': 'AAPL', ... | \n", "{'media': [{'id': 1610771243019689984, 'id_str... | \n", "<a href=\"https://mobile.twitter.com\" rel=\"nofo... | \n", "None | \n", "... | \n", "False | \n", "False | \n", "True | \n", "en | \n", "None | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
82 | \n", "2023-01-04 23:10:20+00:00 | \n", "1610775668924583936 | \n", "1610775668924583936 | \n", "These TOP companies have cash in the bank!\\n$A... | \n", "False | \n", "[0, 173] | \n", "{'hashtags': [], 'symbols': [{'text': 'AAPL', ... | \n", "NaN | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "None | \n", "... | \n", "False | \n", "NaN | \n", "NaN | \n", "en | \n", "None | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
83 | \n", "2023-01-04 23:10:46+00:00 | \n", "1610775777083006976 | \n", "1610775777083006976 | \n", "Darvas strategy. \\n\\nPart 15• \\n\\nAlways speak... | \n", "False | \n", "[0, 122] | \n", "{'hashtags': [], 'symbols': [{'text': 'MSFT', ... | \n", "{'media': [{'id': 1610775771181682690, 'id_str... | \n", "<a href=\"http://twitter.com/download/iphone\" r... | \n", "None | \n", "... | \n", "False | \n", "False | \n", "True | \n", "en | \n", "None | \n", "{'id': 1610775777083006976, 'id_str': '1610775... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
84 rows × 38 columns
\n", "