{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"../../FinNLP\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### SEC" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.company_announcement.sec import SEC_Announcement" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "start_date = \"2020-01-01\"\n", "end_date = \"2020-06-01\"\n", "stock = \"AAPL\"\n", "config = {\n", " \"use_proxy\": \"us_free\",\n", " \"max_retry\": 5,\n", " \"proxy_pages\": 3,\n", "}\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Checking ips: 100%|██████████| 45/45 [01:42<00:00, 2.28s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Get proxy ips: 45.\n", "Usable proxy ips: 44.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Downloading by item...: 100%|██████████| 39/39 [01:39<00:00, 2.54s/it]\n" ] } ], "source": [ "downloader = SEC_Announcement(config)\n", "downloader.download_date_range_stock(start_date, end_date, stock = stock)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idciksperiod_endingroot_formfile_numdisplay_namesxslsequencefile_datebiz_statessicsformadshfilm_numbiz_locationsfile_typefile_descriptioninc_statesitecontent
00000320193-20-000056:wf-form4_158932261319105.xml[0001631982, 0000320193]2020-05-084[][KONDO CHRIS (CIK 0001631982), Apple Inc. (A...xslF345X0312020-05-12[][3571]40000320193-20-000056[][, ]4FORM 4[, CA, ][]SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
10000320193-20-000054:wf-form4_158829658358801.xml[0001051401, 0000320193]2020-04-284[001-36743][JUNG ANDREA (CIK 0001051401), Apple Inc. (A...xslF345X0312020-04-30[CA][3571]40000320193-20-000054[20838087][, Cupertino, CA]4FORM 4[, CA][]SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
\n", "
" ], "text/plain": [ " _id \\\n", "0 0000320193-20-000056:wf-form4_158932261319105.xml \n", "1 0000320193-20-000054:wf-form4_158829658358801.xml \n", "\n", " ciks period_ending root_form file_num \\\n", "0 [0001631982, 0000320193] 2020-05-08 4 [] \n", "1 [0001051401, 0000320193] 2020-04-28 4 [001-36743] \n", "\n", " display_names xsl sequence \\\n", "0 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... xslF345X03 1 \n", "1 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... xslF345X03 1 \n", "\n", " file_date biz_states sics form adsh film_num \\\n", "0 2020-05-12 [] [3571] 4 0000320193-20-000056 [] \n", "1 2020-04-30 [CA] [3571] 4 0000320193-20-000054 [20838087] \n", "\n", " biz_locations file_type file_description inc_states ite \\\n", "0 [, ] 4 FORM 4 [, CA, ] [] \n", "1 [, Cupertino, CA] 4 FORM 4 [, CA] [] \n", "\n", " content \n", "0 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "1 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = downloader.dataframe\n", "# df = df.drop_duplicates()\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(21, 20)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
file_datedisplay_namescontent
02020-05-12[KONDO CHRIS (CIK 0001631982), Apple Inc. (A...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
12020-04-30[JUNG ANDREA (CIK 0001051401), Apple Inc. (A...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
22020-04-17[O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc....SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
32020-04-17[KONDO CHRIS (CIK 0001631982), Apple Inc. (A...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
42020-04-09[Maestri Luca (CIK 0001513362), Apple Inc. (...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
52020-04-03[WILLIAMS JEFFREY E (CIK 0001496686), Apple I...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
62020-04-03[Maestri Luca (CIK 0001513362), Apple Inc. (...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
72020-02-28[WAGNER SUSAN (CIK 0001059235), Apple Inc. (...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
82020-02-28[LEVINSON ARTHUR D (CIK 0001214128), Apple In...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
92020-02-28[JUNG ANDREA (CIK 0001051401), Apple Inc. (A...SEC Form 4 \\n FORM 4UNITED STATES SECURITIES...
\n", "
" ], "text/plain": [ " file_date display_names \\\n", "0 2020-05-12 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... \n", "1 2020-04-30 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... \n", "2 2020-04-17 [O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc.... \n", "3 2020-04-17 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... \n", "4 2020-04-09 [Maestri Luca (CIK 0001513362), Apple Inc. (... \n", "5 2020-04-03 [WILLIAMS JEFFREY E (CIK 0001496686), Apple I... \n", "6 2020-04-03 [Maestri Luca (CIK 0001513362), Apple Inc. (... \n", "7 2020-02-28 [WAGNER SUSAN (CIK 0001059235), Apple Inc. (... \n", "8 2020-02-28 [LEVINSON ARTHUR D (CIK 0001214128), Apple In... \n", "9 2020-02-28 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... \n", "\n", " content \n", "0 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "1 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "2 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "3 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "4 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "5 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "6 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "7 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "8 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n", "9 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_columns = [\"file_date\", \"display_names\", \"content\"]\n", "df[selected_columns].head(10)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Juchao" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from finnlp.data_sources.company_announcement.juchao import Juchao_Announcement" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "start_date = \"2020-01-01\"\n", "end_date = \"2020-06-01\"\n", "stock = \"000001\"\n", "config = {\n", " \"use_proxy\": \"china_free\",\n", " \"max_retry\": 5,\n", " \"proxy_pages\": 3,\n", "}\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Gathering free ips by pages...: 100%|██████████| 3/3 [00:05<00:00, 1.86s/it]\n", "Checking ips: 100%|██████████| 45/45 [00:48<00:00, 1.09s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "获取到的代理ip数量: 45 。Get proxy ips: 45.\n", "能用的代理数量: 6。Usable proxy ips: 6.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1bb13261e75147929b30222347ab9cc5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading by page...: 0%| | 0/2 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idsecCodesecNameorgIdannouncementIdannouncementTitleannouncementTimeadjunctUrladjunctSizeadjunctType...importantbatchNumannouncementContentorgNametileSecNameshortTitleannouncementTypeNamesecNameListPDF_pathContent
0None000001平安银行gssz00000011207862647关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告2020-05-27finalpage/2020-05-27/1207862647.PDF148PDF...NoneNoneNone平安银行关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告NoneNoneremoved证券代码: 000001 证券简称:平安银行 ...
1None000001平安银行gssz000000112078436882019年年度权益分派实施公告2020-05-22finalpage/2020-05-22/1207843688.PDF214PDF...NoneNoneNone平安银行2019年年度权益分派实施公告NoneNoneremoved1 证券代码: 000001 证券简称:平安银行 ...
\n", "

2 rows × 25 columns

\n", "" ], "text/plain": [ " id secCode secName orgId announcementId \\\n", "0 None 000001 平安银行 gssz0000001 1207862647 \n", "1 None 000001 平安银行 gssz0000001 1207843688 \n", "\n", " announcementTitle announcementTime \\\n", "0 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 2020-05-27 \n", "1 2019年年度权益分派实施公告 2020-05-22 \n", "\n", " adjunctUrl adjunctSize adjunctType ... \\\n", "0 finalpage/2020-05-27/1207862647.PDF 148 PDF ... \n", "1 finalpage/2020-05-22/1207843688.PDF 214 PDF ... \n", "\n", " important batchNum announcementContent orgName tileSecName \\\n", "0 None None None 平安银行 \n", "1 None None None 平安银行 \n", "\n", " shortTitle announcementTypeName secNameList PDF_path \\\n", "0 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 None None removed \n", "1 2019年年度权益分派实施公告 None None removed \n", "\n", " Content \n", "0 证券代码: 000001 证券简称:平安银行 ... \n", "1 1 证券代码: 000001 证券简称:平安银行 ... \n", "\n", "[2 rows x 25 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = downloader.dataframe\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(42, 25)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
announcementTimeshortTitleContent
02020-05-27关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告证券代码: 000001 证券简称:平安银行 ...
12020-05-222019年年度权益分派实施公告1 证券代码: 000001 证券简称:平安银行 ...
22020-05-20关于获准发行小微企业贷款专项金融债券的公告证券代码: 000001 证券简称:平安银行 ...
32020-05-16监事会决议公告1 证券代码: 000001 证券简称: 平安银行 ...
42020-05-152019年年度股东大会决议公告1 证券代码: 000001 证券简称:平安银行 ...
52020-05-152019年年度股东大会的法律意见书北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10...
62020-04-30中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有...
72020-04-30独立董事独立意见1 平安银行股份有限公司独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导...
82020-04-30关联交易公告1 证券代码: 000001 证券简称:平安银行 ...
92020-04-212020年第一季度报告全文证券代码: 000001 证券简称:平安银行 ...
\n", "
" ], "text/plain": [ " announcementTime shortTitle \\\n", "0 2020-05-27 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 \n", "1 2020-05-22 2019年年度权益分派实施公告 \n", "2 2020-05-20 关于获准发行小微企业贷款专项金融债券的公告 \n", "3 2020-05-16 监事会决议公告 \n", "4 2020-05-15 2019年年度股东大会决议公告 \n", "5 2020-05-15 2019年年度股东大会的法律意见书 \n", "6 2020-04-30 中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见 \n", "7 2020-04-30 独立董事独立意见 \n", "8 2020-04-30 关联交易公告 \n", "9 2020-04-21 2020年第一季度报告全文 \n", "\n", " Content \n", "0 证券代码: 000001 证券简称:平安银行 ... \n", "1 1 证券代码: 000001 证券简称:平安银行 ... \n", "2 证券代码: 000001 证券简称:平安银行 ... \n", "3 1 证券代码: 000001 证券简称: 平安银行 ... \n", "4 1 证券代码: 000001 证券简称:平安银行 ... \n", "5 北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10... \n", "6 1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有... \n", "7 1 平安银行股份有限公司独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导... \n", "8 1 证券代码: 000001 证券简称:平安银行 ... \n", "9 证券代码: 000001 证券简称:平安银行 ... " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_columns = [\"announcementTime\", \"shortTitle\",\"Content\"]\n", "df[selected_columns].head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "finrl", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "afd6dc03c9be451573fc2885de79a969af6a24a159f11a3ead741ab7a9ff405f" } } }, "nbformat": 4, "nbformat_minor": 2 }