{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"../../FinNLP\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Eastmoney" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.eastmoney_streaming import Eastmoney_Streaming" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pages = 3\n", "stock = \"600519\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading ... 0 1 2 " ] } ], "source": [ "downloader = Eastmoney_Streaming()\n", "downloader.download_streaming_stock(stock, pages)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(241, 92)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "downloader.dataframe.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
post_idpost_titlestockbar_codestockbar_namestockbar_typeuser_iduser_nicknameuser_extendinfospost_click_countpost_forward_count...relate_topiczwpage_flagsource_post_comment_countpost_atuserreply_listcontent_typerepost_statereptile_stateallow_likes_statepost_is_hot
01324058647贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元600519贵州茅台吧100.07344113638256342贵州茅台资讯{'user_accreditinfos': None, 'deactive': '0', ...379914...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

1 rows × 92 columns

\n", "
" ], "text/plain": [ " post_id post_title stockbar_code \\\n", "0 1324058647 贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元 600519 \n", "\n", " stockbar_name stockbar_type user_id user_nickname \\\n", "0 贵州茅台吧 100.0 7344113638256342 贵州茅台资讯 \n", "\n", " user_extendinfos post_click_count \\\n", "0 {'user_accreditinfos': None, 'deactive': '0', ... 3799 \n", "\n", " post_forward_count ... relate_topic zwpage_flag \\\n", "0 14 ... NaN NaN \n", "\n", " source_post_comment_count post_atuser reply_list content_type \\\n", "0 NaN NaN NaN NaN \n", "\n", " repost_state reptile_state allow_likes_state post_is_hot \n", "0 NaN NaN NaN NaN \n", "\n", "[1 rows x 92 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "downloader.dataframe.head(1)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
post_titleuser_nicknamestockbar_namepost_click_countpost_forward_countpost_comment_countpost_publish_timepost_last_timepost_display_time
0贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元贵州茅台资讯贵州茅台吧379914152023-06-25 22:17:502023-06-26 03:12:472023-06-25 22:17:50
1贵州茅台:贵州茅台2022年年度权益分派实施公告贵州茅台资讯贵州茅台吧642347172023-06-25 15:32:422023-06-26 00:57:392023-06-26 00:00:00
2将派发现金红利325.49亿元!贵州茅台上市以来累计分红超2000亿元贵州茅台资讯贵州茅台吧460102023-06-25 23:49:072023-06-25 23:49:072023-06-25 23:49:07
3茅台冰淇淋悄然卖数亿 年轻市场真被抓住了吗贵州茅台资讯贵州茅台吧261215112023-06-24 07:03:532023-06-25 18:48:212023-06-24 07:03:53
4白酒本周跌5.49%原因是什么?下周怎么看?NaNNaN101974252023-06-24 12:29:532023-06-25 23:12:492023-06-24 12:29:53
5本周持仓与下周交易计划满仓日记财富号评论吧547212023-06-25 20:30:542023-06-26 03:19:082023-06-25 20:30:54
6茅台酒的估值真的是高菩萨小跟班888贵州茅台吧33002023-06-26 03:02:142023-06-26 03:02:142023-06-26 03:02:14
7茅台里面的资金估计要出来支持一些中小微企业政策导向[吃瓜]菩萨小跟班888贵州茅台吧24002023-06-26 01:50:122023-06-26 01:50:122023-06-26 01:50:12
8每股市值收益率,还没有银行定期利息高呢。(远离泡沫浮云地震带)章鱼帝的智慧贵州茅台吧33012023-06-25 22:48:492023-06-26 01:20:042023-06-25 22:48:49
96月最后的倔强(浪潮信息,昆仑万维,鸿博股份)赛道复苏。夏夏爱美丽财富号评论吧24590342023-06-25 22:16:032023-06-26 00:45:532023-06-25 22:16:03
\n", "
" ], "text/plain": [ " post_title user_nickname stockbar_name \\\n", "0 贵州茅台:每股派25.911元 6月30日共计派发现金红利325.49亿元 贵州茅台资讯 贵州茅台吧 \n", "1 贵州茅台:贵州茅台2022年年度权益分派实施公告 贵州茅台资讯 贵州茅台吧 \n", "2 将派发现金红利325.49亿元!贵州茅台上市以来累计分红超2000亿元 贵州茅台资讯 贵州茅台吧 \n", "3 茅台冰淇淋悄然卖数亿 年轻市场真被抓住了吗 贵州茅台资讯 贵州茅台吧 \n", "4 白酒本周跌5.49%原因是什么?下周怎么看? NaN NaN \n", "5 本周持仓与下周交易计划 满仓日记 财富号评论吧 \n", "6 茅台酒的估值真的是高 菩萨小跟班888 贵州茅台吧 \n", "7 茅台里面的资金估计要出来支持一些中小微企业政策导向[吃瓜] 菩萨小跟班888 贵州茅台吧 \n", "8 每股市值收益率,还没有银行定期利息高呢。(远离泡沫浮云地震带) 章鱼帝的智慧 贵州茅台吧 \n", "9 6月最后的倔强(浪潮信息,昆仑万维,鸿博股份)赛道复苏。 夏夏爱美丽 财富号评论吧 \n", "\n", " post_click_count post_forward_count post_comment_count \\\n", "0 3799 14 15 \n", "1 6423 47 17 \n", "2 460 1 0 \n", "3 2612 15 11 \n", "4 10197 4 25 \n", "5 547 2 1 \n", "6 33 0 0 \n", "7 24 0 0 \n", "8 33 0 1 \n", "9 2459 0 34 \n", "\n", " post_publish_time post_last_time post_display_time \n", "0 2023-06-25 22:17:50 2023-06-26 03:12:47 2023-06-25 22:17:50 \n", "1 2023-06-25 15:32:42 2023-06-26 00:57:39 2023-06-26 00:00:00 \n", "2 2023-06-25 23:49:07 2023-06-25 23:49:07 2023-06-25 23:49:07 \n", "3 2023-06-24 07:03:53 2023-06-25 18:48:21 2023-06-24 07:03:53 \n", "4 2023-06-24 12:29:53 2023-06-25 23:12:49 2023-06-24 12:29:53 \n", "5 2023-06-25 20:30:54 2023-06-26 03:19:08 2023-06-25 20:30:54 \n", "6 2023-06-26 03:02:14 2023-06-26 03:02:14 2023-06-26 03:02:14 \n", "7 2023-06-26 01:50:12 2023-06-26 01:50:12 2023-06-26 01:50:12 \n", "8 2023-06-25 22:48:49 2023-06-26 01:20:04 2023-06-25 22:48:49 \n", "9 2023-06-25 22:16:03 2023-06-26 00:45:53 2023-06-25 22:16:03 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_columns = [\"post_title\",\"user_nickname\", \"stockbar_name\" ,\"post_click_count\", \"post_forward_count\", \"post_comment_count\", \"post_publish_time\", \"post_last_time\", \"post_display_time\"]\n", "downloader.dataframe[selected_columns].head(10)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Facebook get cookies" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from selenium import webdriver\n", "import json\n", "\n", "browser = webdriver.ChromiumEdge()\n", "browser.get('https://www.facebook.com')\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Please login your account in the brower" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cookies = browser.get_cookies() \n", "with open(\"cookies.json\", \"w\", encoding=\"utf-8\") as cks:\n", " json.dump(cookies, cks)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Facebook" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.facebook_streaming import Facebook_Streaming\n", "import json" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# load cookies\n", "with open(\"cookies.json\", \"r\", encoding=\"utf-8\") as cks: \n", " cookies = json.load(cks)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "config = {\n", " \"cookies\":cookies, \n", " \"headless\": False,\n", " \"stealth_path\":\"../../FinNLP/finnlp/data_sources/social_media/stealth.min.js\"\n", " }\n", "pages = 3\n", "stock = \"AAPL\"" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 17/17 [00:57<00:00, 3.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Only support the first page now!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "downloader = Facebook_Streaming(config)\n", "downloader.download_streaming_stock(stock, pages)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
contentdate
6AAPL (Stock Market)4h󰞋󰙷
8Day 7\\nIntroduction to Stock Market\\nWhat you ...6h󰞋󰙷
11US: AAPL new high and breakout from two-year r...1d󰞋󰙷
\n", "
" ], "text/plain": [ " content date\n", "6 AAPL (Stock Market) 4h󰞋󰙷\n", "8 Day 7\\nIntroduction to Stock Market\\nWhat you ... 6h󰞋󰙷\n", "11 US: AAPL new high and breakout from two-year r... 1d󰞋󰙷" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "downloader.dataframe" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Xueqiu / 雪球" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.xueqiu_streaming import Xueqiu_Streaming" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pages = 3\n", "stock = \"茅台\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading ... 0 1 2 " ] } ], "source": [ "downloader = Xueqiu_Streaming()\n", "downloader.download_streaming_stock(stock, pages)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(29, 53)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "downloader.dataframe.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
blockedblockingcanEditcommentIdcontroversialcreated_atdescriptiondonate_countdonate_snowcoineditable...truncated_bytypeuseruser_idview_countfirstImgpic_sizesedited_atquote_cardssymbol_id
0FalseFalseTrue0False2023-06-25 12:15:07<a href=\"http://xueqiu.com/S/SZ000860\" target=...00True...02{'allow_all_stock': False, 'block_status': 0, ...8364804052471NaNNaNNaNNaNNaN
\n", "

1 rows × 53 columns

\n", "
" ], "text/plain": [ " blocked blocking canEdit commentId controversial created_at \\\n", "0 False False True 0 False 2023-06-25 12:15:07 \n", "\n", " description donate_count \\\n", "0 \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
created_atdescriptiontitletexttargetsourceuser
02023-06-25 12:15:07<a href=\"http://xueqiu.com/S/SZ000860\" target=...<a href=\"http://xueqiu.com/S/SZ000860\" target=.../8364804052/253976413Android{'allow_all_stock': False, 'block_status': 0, ...
12023-06-25 12:14:22<a href=\"http://xueqiu.com/S/SH600519\" target=...<p><a href=\"http://xueqiu.com/S/SH600519\" targ.../4631817224/253976390雪球{'allow_all_stock': False, 'block_status': 0, ...
22023-06-25 12:13:01...提高。白酒:五粮液、迎驾贡酒、<span class='highlight'>茅台</...6.25 赛道和白马的机会<p>这个假期外围的环境不太好,已经是基本共识了。明天开盘大A承压低开也基本是一致预期。这么.../4322952939/253976335雪球{'allow_all_stock': False, 'block_status': 0, ...
32023-06-25 11:58:55茅台发生活费了茅台发生活费了<br/><img class=\"ke_img\" src=\"https://x.../4653939718/253975764iPhone{'allow_all_stock': False, 'block_status': 0, ...
42023-06-25 11:54:05...业绩及股价,形成正反馈。当年<span class='highlight'>茅台</s...持仓吹票,共同致富<p><a href=\"http://xueqiu.com/k?q=%23%E4%BB%A5.../8113901491/253975613Android{'allow_all_stock': False, 'block_status': 0, ...
52023-06-25 11:50:11微酒酒业快讯,6月25日,酒业新闻一览-·企业动态·-01<span class='high...6.25:<span class='highlight'>茅</span><span cla...<p><img class=\"ke_img\" src=\"https://xqimg.imed.../3615583399/253975485雪球{'allow_all_stock': False, 'block_status': 0, ...
62023-06-25 11:48:42<a href=\"http://xueqiu.com/S/SH603027\" target=...<a href=\"http://xueqiu.com/S/SH603027\" target=.../2659542807/253975430iPhone{'allow_all_stock': False, 'block_status': 0, ...
72023-06-25 11:45:54段永平说:我不鼓励小散投<a href=\"https://xueqiu.com/S/AAPL...段永平说:我不鼓励小散投<a href=\"https://xueqiu.com/S/AAPL.../9456980430/253975338iPhone{'allow_all_stock': False, 'block_status': 0, ...
82023-06-25 11:33:01泸州老窖酒传统酿制技艺第二十三代传承人·国窖1573·曾娜大师鉴藏版,端午举杯小酒。<br/...泸州老窖酒传统酿制技艺第二十三代传承人·国窖1573·曾娜大师鉴藏版,端午举杯小酒。<br/.../9893982765/253974916Android{'allow_all_stock': False, 'block_status': 0, ...
92023-06-25 11:25:44...酒店中,白酒卖得最好的往往不是<span class='highlight'>茅台</...街头没生意的烟酒店,为什么不会倒闭<p><img class=\"ke_img\" src=\"https://xqimg.imed.../5497522856/253974630雪球{'allow_all_stock': False, 'block_status': 0, ...
\n", "" ], "text/plain": [ " created_at description \\\n", "0 2023-06-25 12:15:07
茅台茅台茅台这个假期外围的环境不太好,已经是基本共识了。明天开盘大A承压低开也基本是一致预期。这么... /4322952939/253976335 \n", "3 茅台发生活费了
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idbodycreated_atusersourcesymbolspricesmentioned_usersentitiesliked_by_selfreshared_by_selflinksreshare_messageconversationlikesresharesnetwork
0522005335NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...2023-04-07T15:24:22Z{'id': 4744627, 'username': 'JavierAyala', 'na...{'id': 1149, 'title': 'StockTwits for iOS', 'u...[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...[]{'sentiment': None}FalseFalseNaNNaNNaNNaNNaNNaN
1522004768$AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi...2023-04-07T15:17:43Z{'id': 6330207, 'username': 'PlainFacts_2121',...{'id': 2269, 'title': 'StockTwits Web', 'url':...[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...[{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '...[]{'sentiment': None}FalseFalse[{'title': 'China officials who abused health ...NaNNaNNaNNaNNaN
\n", "" ], "text/plain": [ " id body \\\n", "0 522005335 NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL... \n", "1 522004768 $AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi... \n", "\n", " created_at user \\\n", "0 2023-04-07T15:24:22Z {'id': 4744627, 'username': 'JavierAyala', 'na... \n", "1 2023-04-07T15:17:43Z {'id': 6330207, 'username': 'PlainFacts_2121',... \n", "\n", " source \\\n", "0 {'id': 1149, 'title': 'StockTwits for iOS', 'u... \n", "1 {'id': 2269, 'title': 'StockTwits Web', 'url':... \n", "\n", " symbols \\\n", "0 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... \n", "1 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... \n", "\n", " prices mentioned_users \\\n", "0 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... [] \n", "1 [{'id': 686, 'symbol': 'AAPL', 'symbol_mic': '... [] \n", "\n", " entities liked_by_self reshared_by_self \\\n", "0 {'sentiment': None} False False \n", "1 {'sentiment': None} False False \n", "\n", " links reshare_message \\\n", "0 NaN NaN \n", "1 [{'title': 'China officials who abused health ... NaN \n", "\n", " conversation likes reshares network \n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = downloader.dataframe\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
created_atbody
02023-04-07T15:24:22ZNANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...
12023-04-07T15:17:43Z$AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi...
22023-04-07T15:17:25Z$AAPL $GOOG $AMZN I took a Trump today. \\n\\nH...
32023-04-07T15:16:54Z$SPY $AAPL will take this baby down, time for ...
42023-04-07T15:11:37Z$SPY $3T it ALREADY DID - look at the pre-COV...
52023-04-07T15:10:29Z$AAPL $QQQ $STUDY We are on to the next one! A...
62023-04-07T15:06:00Z$AAPL was analyzed by 48 analysts. The buy con...
72023-04-07T14:54:29Z$AAPL both retiring. \\n \\nCraig....
82023-04-07T14:40:06Z$SPY $QQQ $TSLA $AAPL SPY 500 HAS STARTED🚀😍 BI...
92023-04-07T14:38:57ZNancy 🩵 (Tim) $AAPL
\n", "
" ], "text/plain": [ " created_at body\n", "0 2023-04-07T15:24:22Z NANCY PELOSI JUST BOUGHT 10,000 SHARES OF APPL...\n", "1 2023-04-07T15:17:43Z $AAPL $SPY \\n \\nhttps://amp.scmp.com/news/chi...\n", "2 2023-04-07T15:17:25Z $AAPL $GOOG $AMZN I took a Trump today. \\n\\nH...\n", "3 2023-04-07T15:16:54Z $SPY $AAPL will take this baby down, time for ...\n", "4 2023-04-07T15:11:37Z $SPY $3T it ALREADY DID - look at the pre-COV...\n", "5 2023-04-07T15:10:29Z $AAPL $QQQ $STUDY We are on to the next one! A...\n", "6 2023-04-07T15:06:00Z $AAPL was analyzed by 48 analysts. The buy con...\n", "7 2023-04-07T14:54:29Z $AAPL both retiring. \\n \\nCraig....\n", "8 2023-04-07T14:40:06Z $SPY $QQQ $TSLA $AAPL SPY 500 HAS STARTED🚀😍 BI...\n", "9 2023-04-07T14:38:57Z Nancy 🩵 (Tim) $AAPL" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_columns = [\"created_at\", \"body\"]\n", "df[selected_columns].head(10)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Reddit Wallstreetbets Streaming" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.reddit_streaming import Reddit_Streaming" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pages = 3\n", "config = {\n", " # \"use_proxy\": \"us_free\",\n", " \"max_retry\": 5,\n", " \"proxy_pages\": 2,\n", "}" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading by pages...: 100%|██████████| 3/3 [00:08<00:00, 2.83s/it]\n" ] } ], "source": [ "downloader = Reddit_Streaming(config)\n", "downloader.download_streaming_all(pages)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnumCommentscreatedscoredistinguishTypeisLockedisStickiedthumbnailtitleauthor...postEventInfopredictionTournamentreactedFromremovedByremovedByCategorysubredditsuggestedCommentSorttopAwardedTypeurlwhitelistStatus
0t3_12epaq0816808819740000NoneFalseFalse{'url': 'https://b.thumbs.redditmedia.com/W8hd...Y’all making me feel like spoodermanghostwholags...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1t3_zr9v10016715957820002NoneTrueFalse{'url': 'https://b.thumbs.redditmedia.com/dJqb...Do you track your investments in a spreadsheet...sharesight...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

2 rows × 100 columns

\n", "
" ], "text/plain": [ " id numComments created score distinguishType isLocked \\\n", "0 t3_12epaq0 8 1680881974000 0 None False \n", "1 t3_zr9v10 0 1671595782000 2 None True \n", "\n", " isStickied thumbnail \\\n", "0 False {'url': 'https://b.thumbs.redditmedia.com/W8hd... \n", "1 False {'url': 'https://b.thumbs.redditmedia.com/dJqb... \n", "\n", " title author ... \\\n", "0 Y’all making me feel like spooderman ghostwholags ... \n", "1 Do you track your investments in a spreadsheet... sharesight ... \n", "\n", " postEventInfo predictionTournament reactedFrom removedBy removedByCategory \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "\n", " subreddit suggestedCommentSort topAwardedType url whitelistStatus \n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "\n", "[2 rows x 100 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = downloader.dataframe\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnumCommentscreatedscoredistinguishTypeisLockedisStickiedthumbnailtitleauthor...postEventInfopredictionTournamentreactedFromremovedByremovedByCategorysubredditsuggestedCommentSorttopAwardedTypeurlwhitelistStatus
0t3_12epaq082023-04-07 15:39:340NoneFalseFalse{'url': 'https://b.thumbs.redditmedia.com/W8hd...Y’all making me feel like spoodermanghostwholags...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1t3_zr9v1002022-12-21 04:09:422NoneTrueFalse{'url': 'https://b.thumbs.redditmedia.com/dJqb...Do you track your investments in a spreadsheet...sharesight...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

2 rows × 100 columns

\n", "
" ], "text/plain": [ " id numComments created score distinguishType isLocked \\\n", "0 t3_12epaq0 8 2023-04-07 15:39:34 0 None False \n", "1 t3_zr9v10 0 2022-12-21 04:09:42 2 None True \n", "\n", " isStickied thumbnail \\\n", "0 False {'url': 'https://b.thumbs.redditmedia.com/W8hd... \n", "1 False {'url': 'https://b.thumbs.redditmedia.com/dJqb... \n", "\n", " title author ... \\\n", "0 Y’all making me feel like spooderman ghostwholags ... \n", "1 Do you track your investments in a spreadsheet... sharesight ... \n", "\n", " postEventInfo predictionTournament reactedFrom removedBy removedByCategory \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "\n", " subreddit suggestedCommentSort topAwardedType url whitelistStatus \n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "\n", "[2 rows x 100 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df[\"created\"] = pd.to_datetime(df[\"created\"], unit = \"ms\")\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
createdtitle
02023-04-07 15:39:34Y’all making me feel like spooderman
12022-12-21 04:09:42Do you track your investments in a spreadsheet...
22022-12-21 04:09:42Do you track your investments in a spreadsheet...
32023-04-07 15:29:23Can a Blackberry holder get some help 🥺
42023-04-07 14:49:55The week of CPI and FOMC Minutes… 4-6-23 SPY/ ...
52023-04-07 14:19:22Well let’s hope your job likes you, thanks Jerome
62023-04-07 14:06:32Does anyone else feel an overwhelming sense of...
72023-04-07 13:47:59Watermarked Jesus explains the market being cl...
82023-04-07 13:26:23Jobs report shows 236,000 gain in March. Hot l...
92023-04-07 13:07:15The recession is over! Let's buy more stocks!
\n", "
" ], "text/plain": [ " created title\n", "0 2023-04-07 15:39:34 Y’all making me feel like spooderman\n", "1 2022-12-21 04:09:42 Do you track your investments in a spreadsheet...\n", "2 2022-12-21 04:09:42 Do you track your investments in a spreadsheet...\n", "3 2023-04-07 15:29:23 Can a Blackberry holder get some help 🥺\n", "4 2023-04-07 14:49:55 The week of CPI and FOMC Minutes… 4-6-23 SPY/ ...\n", "5 2023-04-07 14:19:22 Well let’s hope your job likes you, thanks Jerome\n", "6 2023-04-07 14:06:32 Does anyone else feel an overwhelming sense of...\n", "7 2023-04-07 13:47:59 Watermarked Jesus explains the market being cl...\n", "8 2023-04-07 13:26:23 Jobs report shows 236,000 gain in March. Hot l...\n", "9 2023-04-07 13:07:15 The recession is over! Let's buy more stocks!" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_columns = [\"created\", \"title\"]\n", "df[selected_columns].head(10)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Weibo Date Range" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.weibo_date_range import Weibo_Date_Range" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "start_date = \"2016-01-01\"\n", "end_date = \"2016-01-02\"\n", "stock = \"茅台\"\n", "config = {\n", " \"use_proxy\": \"china_free\",\n", " \"max_retry\": 5,\n", " \"proxy_pages\": 5,\n", " \"cookies\": \"Your_Login_Cookies\",\n", "}\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Gathering free ips by pages...: 100%|██████████| 5/5 [00:09<00:00, 1.95s/it]\n", "Checking ips: 100%|██████████| 75/75 [01:23<00:00, 1.11s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "获取到的代理ip数量: 75 。Get proxy ips: 75.\n", "能用的代理数量: 13。Usable proxy ips: 13.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Downloading by dates...: 100%|██████████| 2/2 [01:03<00:00, 31.56s/it]\n" ] } ], "source": [ "downloader = Weibo_Date_Range(config)\n", "downloader.download_date_range_stock(start_date, end_date, stock = stock)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datedate_contentsourcecontent
02016-01-012016年01月01日23:41Moto X#舆论之锤#唯品会发声明证实销售假茅台-手机腾讯网O网页链接分享来自浏览器!
22016-01-012016年01月01日22:57新浪博客2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原...
62016-01-012016年01月01日22:56新浪博客2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原...
172016-01-012016年01月01日22:40五蕴皆崆Android开心,今天喝了两斤酒(茅台+扎二)三个人,开心!
182016-01-01NaNNaN一家专卖假货的网站某宝,你该学学了!//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品...
192016-01-01NaNNaN一家专卖假货的网站//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品会售假茅台:供货商...
202016-01-012016年01月01日21:46360安全浏览器前几天说了几点不看好茅台的理由,今年过节喝点茅台支持下,个人口感,茅台比小五好喝,茅台依然是...
212016-01-012016年01月01日21:44华为P8老杜酱酒已到货,从明天起正式在甘肃武威开卖。可以不相信我说的话,但一定不要怀疑@杜子建的为人...
222016-01-012016年01月01日21:24华为Ascend P7【唯品会售假茅台后续:供货商被刑拘顾客获十倍补偿】此前,有网友投诉其在唯品会购买的茅台酒质量...
232016-01-012016年01月01日21:16实得惠省钱网唯品会卖假茅台,供货商被刑拘,买家获十倍补偿8888元|此前,有网友在网络论坛发贴(唯品会宣...
\n", "
" ], "text/plain": [ " date date_content source \\\n", "0 2016-01-01 2016年01月01日23:41 Moto X \n", "2 2016-01-01 2016年01月01日22:57 新浪博客 \n", "6 2016-01-01 2016年01月01日22:56 新浪博客 \n", "17 2016-01-01 2016年01月01日22:40 五蕴皆崆Android \n", "18 2016-01-01 NaN NaN \n", "19 2016-01-01 NaN NaN \n", "20 2016-01-01 2016年01月01日21:46 360安全浏览器 \n", "21 2016-01-01 2016年01月01日21:44 华为P8 \n", "22 2016-01-01 2016年01月01日21:24 华为Ascend P7 \n", "23 2016-01-01 2016年01月01日21:16 实得惠省钱网 \n", "\n", " content \n", "0 #舆论之锤#唯品会发声明证实销售假茅台-手机腾讯网O网页链接分享来自浏览器! \n", "2 2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原... \n", "6 2016元旦节快乐酒粮网官方新品首发,茅台镇老酒,酱香原浆酒:酒粮网茅台镇白酒酱香老酒纯粮原... \n", "17 开心,今天喝了两斤酒(茅台+扎二)三个人,开心! \n", "18 一家专卖假货的网站某宝,你该学学了!//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品... \n", "19 一家专卖假货的网站//【唯品会售假茅台:供货商被刑拘顾客获十倍补偿】O唯品会售假茅台:供货商... \n", "20 前几天说了几点不看好茅台的理由,今年过节喝点茅台支持下,个人口感,茅台比小五好喝,茅台依然是... \n", "21 老杜酱酒已到货,从明天起正式在甘肃武威开卖。可以不相信我说的话,但一定不要怀疑@杜子建的为人... \n", "22 【唯品会售假茅台后续:供货商被刑拘顾客获十倍补偿】此前,有网友投诉其在唯品会购买的茅台酒质量... \n", "23 唯品会卖假茅台,供货商被刑拘,买家获十倍补偿8888元|此前,有网友在网络论坛发贴(唯品会宣... " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = downloader.dataframe\n", "df = df.drop_duplicates()\n", "df.head(10)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(60, 4)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Weibo Streaming" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.social_media.weibo_streaming import Weibo_Streaming" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "rounds = 3\n", "stock = \"茅台\"\n", "config = {\n", " \"use_proxy\": \"china_free\",\n", " \"max_retry\": 5,\n", " \"proxy_pages\": 5,\n", " \"cookies\": \"Your_Login_Cookies\",\n", "}\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Gathering free ips by pages...: 100%|██████████| 5/5 [00:09<00:00, 1.98s/it]\n", "Checking ips: 100%|██████████| 75/75 [01:26<00:00, 1.15s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "获取到的代理ip数量: 75 。Get proxy ips: 75.\n", "能用的代理数量: 19。Usable proxy ips: 19.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processing the text content and downloading the full passage...: 100%|██████████| 9/9 [00:00<00:00, 64.89it/s]\n", "Processing the text content and downloading the full passage...: 100%|██████████| 10/10 [00:09<00:00, 1.07it/s]\n", "Processing the text content and downloading the full passage...: 100%|██████████| 10/10 [00:02<00:00, 4.93it/s]\n", "Downloading by page..: 100%|██████████| 3/3 [00:19<00:00, 6.46s/it]\n" ] } ], "source": [ "downloader = Weibo_Streaming(config)\n", "downloader.download_streaming_stock(stock = stock, rounds = rounds)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
card_typedisplay_followbtnmblogitemidactionlogcate_iddisplay_arrowshow_typeschemecontainer_colorcontainer_color_darkcontent_shortcontent
09False{'attitudes_count': 0, 'can_edit': False, 'com...seqid:187118896|type:61|t:|pos:1-0-0|q:茅台|srid...{'act_code': 554, 'ext': 'seqid:187118896|type...3101https://m.weibo.cn/status/MAWMprpPp?mblogid=MA...#EEEEEE#151515事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...
19False{'attitudes_count': 0, 'can_edit': False, 'com...seqid:187118896|type:61|t:|pos:1-0-1|q:茅台|srid...{'act_code': 554, 'ext': 'seqid:187118896|type...3101https://m.weibo.cn/status/MAWHVDm0H?mblogid=MA...#EEEEEE#151515茅台茅台成都收4瓶飞天,自提茅台茅台成都收4瓶飞天,自提
\n", "
" ], "text/plain": [ " card_type display_followbtn \\\n", "0 9 False \n", "1 9 False \n", "\n", " mblog \\\n", "0 {'attitudes_count': 0, 'can_edit': False, 'com... \n", "1 {'attitudes_count': 0, 'can_edit': False, 'com... \n", "\n", " itemid \\\n", "0 seqid:187118896|type:61|t:|pos:1-0-0|q:茅台|srid... \n", "1 seqid:187118896|type:61|t:|pos:1-0-1|q:茅台|srid... \n", "\n", " actionlog cate_id display_arrow \\\n", "0 {'act_code': 554, 'ext': 'seqid:187118896|type... 31 0 \n", "1 {'act_code': 554, 'ext': 'seqid:187118896|type... 31 0 \n", "\n", " show_type scheme \\\n", "0 1 https://m.weibo.cn/status/MAWMprpPp?mblogid=MA... \n", "1 1 https://m.weibo.cn/status/MAWHVDm0H?mblogid=MA... \n", "\n", " container_color container_color_dark \\\n", "0 #EEEEEE #151515 \n", "1 #EEEEEE #151515 \n", "\n", " content_short \\\n", "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n", "1 茅台茅台成都收4瓶飞天,自提 \n", "\n", " content \n", "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n", "1 茅台茅台成都收4瓶飞天,自提 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = downloader.dataframe\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
content_shortcontent
0事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市...
1茅台茅台成都收4瓶飞天,自提茅台茅台成都收4瓶飞天,自提
2我可太喜欢茅台这个防伪了我可太喜欢茅台这个防伪了
3没想到 4S店的二楼 是卖茅台的吧没想到 4S店的二楼 是卖茅台的吧
4买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场
5xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然...xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然...
6茅台 奎屯出一只兔茅茅台 奎屯出一只兔茅
72022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和...2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和...
841岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+...41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+...
0吃到了茅台冰激淋也吃到了茅台冰激淋也
\n", "
" ], "text/plain": [ " content_short \\\n", "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n", "1 茅台茅台成都收4瓶飞天,自提 \n", "2 我可太喜欢茅台这个防伪了 \n", "3 没想到 4S店的二楼 是卖茅台的吧 \n", "4 买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场 \n", "5 xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然... \n", "6 茅台 奎屯出一只兔茅 \n", "7 2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和... \n", "8 41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+... \n", "0 吃到了茅台冰激淋也 \n", "\n", " content \n", "0 事情做好做精,还可以赚大钱的生意才是好生意,而不是忙忙碌碌,最后一算账没赚多少!比如苹果的市... \n", "1 茅台茅台成都收4瓶飞天,自提 \n", "2 我可太喜欢茅台这个防伪了 \n", "3 没想到 4S店的二楼 是卖茅台的吧 \n", "4 买不起茅台,砸锅卖铁也得买得起茅台冰淇淋 许昌·胖东来时代广场 \n", "5 xxx给我枇杷xxx给我蜂蜜 xxx偷茅台喝(假的)。我很喜欢自己家的产品,感觉很无害纯天然... \n", "6 茅台 奎屯出一只兔茅 \n", "7 2022胡润酒类品牌榜发布 2022胡润酒类品牌榜发布点评:与我印象中的有点出入。不出茅台和... \n", "8 41岁,很美妙!“爸爸生日快乐,吃个蛋糕🍰”小奶音听着上头。爱人,亲戚,朋友,草莓🍓,茅台+... \n", "0 吃到了茅台冰激淋也 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_columns = [\"content_short\", \"content\"]\n", "df[selected_columns].head(10)" ] } ], "metadata": { "kernelspec": { "display_name": "finrl", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }