{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append(\"../../FinNLP\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### SEC"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from finnlp.data_sources.company_announcement.sec import SEC_Announcement"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"start_date = \"2020-01-01\"\n",
"end_date = \"2020-06-01\"\n",
"stock = \"AAPL\"\n",
"config = {\n",
" \"use_proxy\": \"us_free\",\n",
" \"max_retry\": 5,\n",
" \"proxy_pages\": 3,\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Checking ips: 100%|██████████| 45/45 [01:42<00:00, 2.28s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Get proxy ips: 45.\n",
"Usable proxy ips: 44.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading by item...: 100%|██████████| 39/39 [01:39<00:00, 2.54s/it]\n"
]
}
],
"source": [
"downloader = SEC_Announcement(config)\n",
"downloader.download_date_range_stock(start_date, end_date, stock = stock)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" _id | \n",
" ciks | \n",
" period_ending | \n",
" root_form | \n",
" file_num | \n",
" display_names | \n",
" xsl | \n",
" sequence | \n",
" file_date | \n",
" biz_states | \n",
" sics | \n",
" form | \n",
" adsh | \n",
" film_num | \n",
" biz_locations | \n",
" file_type | \n",
" file_description | \n",
" inc_states | \n",
" ite | \n",
" content | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0000320193-20-000056:wf-form4_158932261319105.xml | \n",
" [0001631982, 0000320193] | \n",
" 2020-05-08 | \n",
" 4 | \n",
" [] | \n",
" [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... | \n",
" xslF345X03 | \n",
" 1 | \n",
" 2020-05-12 | \n",
" [] | \n",
" [3571] | \n",
" 4 | \n",
" 0000320193-20-000056 | \n",
" [] | \n",
" [, ] | \n",
" 4 | \n",
" FORM 4 | \n",
" [, CA, ] | \n",
" [] | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 1 | \n",
" 0000320193-20-000054:wf-form4_158829658358801.xml | \n",
" [0001051401, 0000320193] | \n",
" 2020-04-28 | \n",
" 4 | \n",
" [001-36743] | \n",
" [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... | \n",
" xslF345X03 | \n",
" 1 | \n",
" 2020-04-30 | \n",
" [CA] | \n",
" [3571] | \n",
" 4 | \n",
" 0000320193-20-000054 | \n",
" [20838087] | \n",
" [, Cupertino, CA] | \n",
" 4 | \n",
" FORM 4 | \n",
" [, CA] | \n",
" [] | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" _id \\\n",
"0 0000320193-20-000056:wf-form4_158932261319105.xml \n",
"1 0000320193-20-000054:wf-form4_158829658358801.xml \n",
"\n",
" ciks period_ending root_form file_num \\\n",
"0 [0001631982, 0000320193] 2020-05-08 4 [] \n",
"1 [0001051401, 0000320193] 2020-04-28 4 [001-36743] \n",
"\n",
" display_names xsl sequence \\\n",
"0 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... xslF345X03 1 \n",
"1 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... xslF345X03 1 \n",
"\n",
" file_date biz_states sics form adsh film_num \\\n",
"0 2020-05-12 [] [3571] 4 0000320193-20-000056 [] \n",
"1 2020-04-30 [CA] [3571] 4 0000320193-20-000054 [20838087] \n",
"\n",
" biz_locations file_type file_description inc_states ite \\\n",
"0 [, ] 4 FORM 4 [, CA, ] [] \n",
"1 [, Cupertino, CA] 4 FORM 4 [, CA] [] \n",
"\n",
" content \n",
"0 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"1 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = downloader.dataframe\n",
"# df = df.drop_duplicates()\n",
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(21, 20)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" file_date | \n",
" display_names | \n",
" content | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2020-05-12 | \n",
" [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 1 | \n",
" 2020-04-30 | \n",
" [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 2 | \n",
" 2020-04-17 | \n",
" [O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc.... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 3 | \n",
" 2020-04-17 | \n",
" [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 4 | \n",
" 2020-04-09 | \n",
" [Maestri Luca (CIK 0001513362), Apple Inc. (... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 5 | \n",
" 2020-04-03 | \n",
" [WILLIAMS JEFFREY E (CIK 0001496686), Apple I... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 6 | \n",
" 2020-04-03 | \n",
" [Maestri Luca (CIK 0001513362), Apple Inc. (... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 7 | \n",
" 2020-02-28 | \n",
" [WAGNER SUSAN (CIK 0001059235), Apple Inc. (... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 8 | \n",
" 2020-02-28 | \n",
" [LEVINSON ARTHUR D (CIK 0001214128), Apple In... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
" 9 | \n",
" 2020-02-28 | \n",
" [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... | \n",
" SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" file_date display_names \\\n",
"0 2020-05-12 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... \n",
"1 2020-04-30 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... \n",
"2 2020-04-17 [O'BRIEN DEIRDRE (CIK 0001767094), Apple Inc.... \n",
"3 2020-04-17 [KONDO CHRIS (CIK 0001631982), Apple Inc. (A... \n",
"4 2020-04-09 [Maestri Luca (CIK 0001513362), Apple Inc. (... \n",
"5 2020-04-03 [WILLIAMS JEFFREY E (CIK 0001496686), Apple I... \n",
"6 2020-04-03 [Maestri Luca (CIK 0001513362), Apple Inc. (... \n",
"7 2020-02-28 [WAGNER SUSAN (CIK 0001059235), Apple Inc. (... \n",
"8 2020-02-28 [LEVINSON ARTHUR D (CIK 0001214128), Apple In... \n",
"9 2020-02-28 [JUNG ANDREA (CIK 0001051401), Apple Inc. (A... \n",
"\n",
" content \n",
"0 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"1 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"2 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"3 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"4 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"5 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"6 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"7 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"8 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... \n",
"9 SEC Form 4 \\n FORM 4UNITED STATES SECURITIES... "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_columns = [\"file_date\", \"display_names\", \"content\"]\n",
"df[selected_columns].head(10)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Juchao"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from finnlp.data_sources.company_announcement.juchao import Juchao_Announcement"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"start_date = \"2020-01-01\"\n",
"end_date = \"2020-06-01\"\n",
"stock = \"000001\"\n",
"config = {\n",
" \"use_proxy\": \"china_free\",\n",
" \"max_retry\": 5,\n",
" \"proxy_pages\": 3,\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Gathering free ips by pages...: 100%|██████████| 3/3 [00:05<00:00, 1.86s/it]\n",
"Checking ips: 100%|██████████| 45/45 [00:48<00:00, 1.09s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"获取到的代理ip数量: 45 。Get proxy ips: 45.\n",
"能用的代理数量: 6。Usable proxy ips: 6.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1bb13261e75147929b30222347ab9cc5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading by page...: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "011ea7c465ad4e1aaccf09714b8e3e19",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Getting the text data...: 0%| | 0/42 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"downloader = Juchao_Announcement(config)\n",
"downloader.download_date_range_stock(start_date, end_date, stock = stock, get_content = True, delate_pdf = True)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" secCode | \n",
" secName | \n",
" orgId | \n",
" announcementId | \n",
" announcementTitle | \n",
" announcementTime | \n",
" adjunctUrl | \n",
" adjunctSize | \n",
" adjunctType | \n",
" ... | \n",
" important | \n",
" batchNum | \n",
" announcementContent | \n",
" orgName | \n",
" tileSecName | \n",
" shortTitle | \n",
" announcementTypeName | \n",
" secNameList | \n",
" PDF_path | \n",
" Content | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" None | \n",
" 000001 | \n",
" 平安银行 | \n",
" gssz0000001 | \n",
" 1207862647 | \n",
" 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 | \n",
" 2020-05-27 | \n",
" finalpage/2020-05-27/1207862647.PDF | \n",
" 148 | \n",
" PDF | \n",
" ... | \n",
" None | \n",
" None | \n",
" | \n",
" None | \n",
" 平安银行 | \n",
" 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 | \n",
" None | \n",
" None | \n",
" removed | \n",
" 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
" 1 | \n",
" None | \n",
" 000001 | \n",
" 平安银行 | \n",
" gssz0000001 | \n",
" 1207843688 | \n",
" 2019年年度权益分派实施公告 | \n",
" 2020-05-22 | \n",
" finalpage/2020-05-22/1207843688.PDF | \n",
" 214 | \n",
" PDF | \n",
" ... | \n",
" None | \n",
" None | \n",
" | \n",
" None | \n",
" 平安银行 | \n",
" 2019年年度权益分派实施公告 | \n",
" None | \n",
" None | \n",
" removed | \n",
" 1 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
"
\n",
"
2 rows × 25 columns
\n",
"
"
],
"text/plain": [
" id secCode secName orgId announcementId \\\n",
"0 None 000001 平安银行 gssz0000001 1207862647 \n",
"1 None 000001 平安银行 gssz0000001 1207843688 \n",
"\n",
" announcementTitle announcementTime \\\n",
"0 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 2020-05-27 \n",
"1 2019年年度权益分派实施公告 2020-05-22 \n",
"\n",
" adjunctUrl adjunctSize adjunctType ... \\\n",
"0 finalpage/2020-05-27/1207862647.PDF 148 PDF ... \n",
"1 finalpage/2020-05-22/1207843688.PDF 214 PDF ... \n",
"\n",
" important batchNum announcementContent orgName tileSecName \\\n",
"0 None None None 平安银行 \n",
"1 None None None 平安银行 \n",
"\n",
" shortTitle announcementTypeName secNameList PDF_path \\\n",
"0 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 None None removed \n",
"1 2019年年度权益分派实施公告 None None removed \n",
"\n",
" Content \n",
"0 证券代码: 000001 证券简称:平安银行 ... \n",
"1 1 证券代码: 000001 证券简称:平安银行 ... \n",
"\n",
"[2 rows x 25 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = downloader.dataframe\n",
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(42, 25)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" announcementTime | \n",
" shortTitle | \n",
" Content | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2020-05-27 | \n",
" 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 | \n",
" 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
" 1 | \n",
" 2020-05-22 | \n",
" 2019年年度权益分派实施公告 | \n",
" 1 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
" 2 | \n",
" 2020-05-20 | \n",
" 关于获准发行小微企业贷款专项金融债券的公告 | \n",
" 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
" 3 | \n",
" 2020-05-16 | \n",
" 监事会决议公告 | \n",
" 1 证券代码: 000001 证券简称: 平安银行 ... | \n",
"
\n",
" \n",
" 4 | \n",
" 2020-05-15 | \n",
" 2019年年度股东大会决议公告 | \n",
" 1 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
" 5 | \n",
" 2020-05-15 | \n",
" 2019年年度股东大会的法律意见书 | \n",
" 北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10... | \n",
"
\n",
" \n",
" 6 | \n",
" 2020-04-30 | \n",
" 中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见 | \n",
" 1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有... | \n",
"
\n",
" \n",
" 7 | \n",
" 2020-04-30 | \n",
" 独立董事独立意见 | \n",
" 1 平安银行股份有限公司独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导... | \n",
"
\n",
" \n",
" 8 | \n",
" 2020-04-30 | \n",
" 关联交易公告 | \n",
" 1 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
" 9 | \n",
" 2020-04-21 | \n",
" 2020年第一季度报告全文 | \n",
" 证券代码: 000001 证券简称:平安银行 ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" announcementTime shortTitle \\\n",
"0 2020-05-27 关于2020年第一期小型微型企业贷款专项金融债券发行完毕的公告 \n",
"1 2020-05-22 2019年年度权益分派实施公告 \n",
"2 2020-05-20 关于获准发行小微企业贷款专项金融债券的公告 \n",
"3 2020-05-16 监事会决议公告 \n",
"4 2020-05-15 2019年年度股东大会决议公告 \n",
"5 2020-05-15 2019年年度股东大会的法律意见书 \n",
"6 2020-04-30 中信证券股份有限公司、平安证券股份有限公司关于公司关联交易有关事项的核查意见 \n",
"7 2020-04-30 独立董事独立意见 \n",
"8 2020-04-30 关联交易公告 \n",
"9 2020-04-21 2020年第一季度报告全文 \n",
"\n",
" Content \n",
"0 证券代码: 000001 证券简称:平安银行 ... \n",
"1 1 证券代码: 000001 证券简称:平安银行 ... \n",
"2 证券代码: 000001 证券简称:平安银行 ... \n",
"3 1 证券代码: 000001 证券简称: 平安银行 ... \n",
"4 1 证券代码: 000001 证券简称:平安银行 ... \n",
"5 北京总部 电话 : (86 -10) 8519 -1300 传真 : (86 -10... \n",
"6 1 中信证券股份有限公司 、平安证券股份有限 公司 关于平安银行股份有限公司 关联交易 有... \n",
"7 1 平安银行股份有限公司独立董事独立意见 根据《关于在上市公司建立独立董事制度的指导... \n",
"8 1 证券代码: 000001 证券简称:平安银行 ... \n",
"9 证券代码: 000001 证券简称:平安银行 ... "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"selected_columns = [\"announcementTime\", \"shortTitle\",\"Content\"]\n",
"df[selected_columns].head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "finrl",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "afd6dc03c9be451573fc2885de79a969af6a24a159f11a3ead741ab7a9ff405f"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}