franz96521
commited on
Commit
·
a53f410
1
Parent(s):
3393cd3
test
Browse files- .gitattributes +2 -0
- AbstractGenerator.ipynb +434 -0
- AbstractGenerator/TrainigData/en.txt +3 -0
- AbstractGenerator/TrainigData/es.txt +3 -0
- AbstractGenerator/weights/run1/encoder.json +3 -0
- AbstractGenerator/weights/run1/events.out.tfevents.1648184225.FRANZ96521-W11 +3 -0
- AbstractGenerator/weights/run1/events.out.tfevents.1648184499.FRANZ96521-W11 +3 -0
- AbstractGenerator/weights/run1/events.out.tfevents.1648229481.FRANZ96521-W11 +3 -0
- AbstractGenerator/weights/run1/hparams.json +3 -0
- AbstractGenerator/weights/run1/vocab.bpe +3 -0
- Descarga.ipynb +278 -0
- PDF_a_TXT.ipynb +105 -0
- models/124M/checkpoint +3 -0
- models/124M/encoder.json +3 -0
- models/124M/hparams.json +3 -0
- models/124M/model.ckpt.data-00000-of-00001 +3 -0
- models/124M/model.ckpt.index +3 -0
- models/124M/model.ckpt.meta +3 -0
- models/124M/vocab.bpe +3 -0
- txt_to_csv.ipynb +662 -0
.gitattributes
CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
models/** filter=lfs diff=lfs merge=lfs -text
|
29 |
+
AbstractGenerator/** filter=lfs diff=lfs merge=lfs -text
|
AbstractGenerator.ipynb
ADDED
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"WARNING:tensorflow:From C:\\Users\\franz\\AppData\\Local\\Temp\\ipykernel_14092\\1198363771.py:6: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n",
|
13 |
+
"Instructions for updating:\n",
|
14 |
+
"Use `tf.config.list_physical_devices('GPU')` instead.\n",
|
15 |
+
"GPU is available\n"
|
16 |
+
]
|
17 |
+
}
|
18 |
+
],
|
19 |
+
"source": [
|
20 |
+
"\n",
|
21 |
+
"import gpt_2_simple as gpt2\n",
|
22 |
+
"import os\n",
|
23 |
+
"import tensorflow as tf\n",
|
24 |
+
"import pandas as pd\n",
|
25 |
+
"import re\n",
|
26 |
+
"print(\"GPU is\", \"available\" if tf.test.is_gpu_available() else \"NOT AVAILABLE\")"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"cell_type": "code",
|
31 |
+
"execution_count": 2,
|
32 |
+
"metadata": {},
|
33 |
+
"outputs": [],
|
34 |
+
"source": [
|
35 |
+
"model_name = \"124M\"\n",
|
36 |
+
"if not os.path.isdir(os.path.join(\"models\", model_name)):\n",
|
37 |
+
"\tprint(f\"Downloading {model_name} model...\")\n",
|
38 |
+
"\tgpt2.download_gpt2(model_name=model_name) "
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"execution_count": 3,
|
44 |
+
"metadata": {},
|
45 |
+
"outputs": [],
|
46 |
+
"source": [
|
47 |
+
"path = 'AbstractGenerator/'\n",
|
48 |
+
"checkpoint_dir =path+'weights/'\n",
|
49 |
+
"data_path = path+'TrainigData/'\n",
|
50 |
+
"\n",
|
51 |
+
"\n",
|
52 |
+
"\n",
|
53 |
+
"file_name_en = 'en'\n",
|
54 |
+
"file_path_en = data_path+file_name_en\n",
|
55 |
+
"\n",
|
56 |
+
"file_name_es = 'es'\n",
|
57 |
+
"file_path_es = data_path+file_name_es\n",
|
58 |
+
"\n",
|
59 |
+
"\n",
|
60 |
+
"prefix= '<|startoftext|>'\n",
|
61 |
+
"sufix ='<|endoftext|>'"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "markdown",
|
66 |
+
"metadata": {},
|
67 |
+
"source": [
|
68 |
+
"# create trainig data"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"cell_type": "code",
|
73 |
+
"execution_count": 13,
|
74 |
+
"metadata": {},
|
75 |
+
"outputs": [],
|
76 |
+
"source": [
|
77 |
+
"en = pd.read_csv('CSV\\scientific_paper_en.csv')[0:1000]\n",
|
78 |
+
"es = pd.read_csv('CSV\\scientific_paper_es.csv')[0:1000]"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": 14,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [],
|
86 |
+
"source": [
|
87 |
+
"import codecs\n",
|
88 |
+
"def createTrainingData(ds,fileName= 'resumen.txt' ,path ='TrainigData/'):\n",
|
89 |
+
" with codecs.open(path+fileName,'a','utf-8') as f:\n",
|
90 |
+
" for i in ds.index:\n",
|
91 |
+
" f.write(prefix+\"\\n\")\n",
|
92 |
+
" f.write(ds.iloc[i]['text_no_abstract'])\n",
|
93 |
+
" f.write(\"ABSTRACT\\n\")\n",
|
94 |
+
" f.write(ds.iloc[i]['abstract']+\"\\n\")\n",
|
95 |
+
" f.write(sufix)\n",
|
96 |
+
" "
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 15,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [],
|
104 |
+
"source": [
|
105 |
+
"createTrainingData(en,'en.txt',data_path)\n",
|
106 |
+
"createTrainingData(es,'es.txt',data_path)"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "markdown",
|
111 |
+
"metadata": {},
|
112 |
+
"source": [
|
113 |
+
"# pretrained"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"cell_type": "code",
|
118 |
+
"execution_count": null,
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [],
|
121 |
+
"source": [
|
122 |
+
"sess = gpt2.start_tf_sess()\n",
|
123 |
+
"gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "markdown",
|
128 |
+
"metadata": {},
|
129 |
+
"source": [
|
130 |
+
"# train "
|
131 |
+
]
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"cell_type": "code",
|
135 |
+
"execution_count": 16,
|
136 |
+
"metadata": {},
|
137 |
+
"outputs": [],
|
138 |
+
"source": [
|
139 |
+
"tf.compat.v1.reset_default_graph()\n",
|
140 |
+
"sess = gpt2.start_tf_sess()"
|
141 |
+
]
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"cell_type": "markdown",
|
145 |
+
"metadata": {},
|
146 |
+
"source": [
|
147 |
+
"## en"
|
148 |
+
]
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"cell_type": "code",
|
152 |
+
"execution_count": null,
|
153 |
+
"metadata": {},
|
154 |
+
"outputs": [],
|
155 |
+
"source": [
|
156 |
+
"gpt2.finetune(sess,\n",
|
157 |
+
" file_path_en+'.txt',\n",
|
158 |
+
" model_name=model_name,\n",
|
159 |
+
" checkpoint_dir=checkpoint_dir, \n",
|
160 |
+
" steps=1000\n",
|
161 |
+
" ) "
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "markdown",
|
166 |
+
"metadata": {},
|
167 |
+
"source": [
|
168 |
+
"## es"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": 17,
|
174 |
+
"metadata": {},
|
175 |
+
"outputs": [
|
176 |
+
{
|
177 |
+
"name": "stdout",
|
178 |
+
"output_type": "stream",
|
179 |
+
"text": [
|
180 |
+
"Loading checkpoint models\\124M\\model.ckpt\n",
|
181 |
+
"INFO:tensorflow:Restoring parameters from models\\124M\\model.ckpt\n",
|
182 |
+
"Loading dataset...\n"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"name": "stderr",
|
187 |
+
"output_type": "stream",
|
188 |
+
"text": [
|
189 |
+
"100%|██████████| 1/1 [00:51<00:00, 51.03s/it]\n"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"name": "stdout",
|
194 |
+
"output_type": "stream",
|
195 |
+
"text": [
|
196 |
+
"dataset has 17511492 tokens\n",
|
197 |
+
"Training...\n"
|
198 |
+
]
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"ename": "ResourceExhaustedError",
|
202 |
+
"evalue": "Graph execution error:\n\nfailed to allocate memory\n\t [[{{node model/h10/attn/ArithmeticOptimizer/ReorderCastLikeAndValuePreserving_float_Cast_1}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.",
|
203 |
+
"output_type": "error",
|
204 |
+
"traceback": [
|
205 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
206 |
+
"\u001b[1;31mResourceExhaustedError\u001b[0m Traceback (most recent call last)",
|
207 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:1377\u001b[0m, in \u001b[0;36mBaseSession._do_call\u001b[1;34m(self, fn, *args)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1375'>1376</a>\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1376'>1377</a>\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs)\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1377'>1378</a>\u001b[0m \u001b[39mexcept\u001b[39;00m errors\u001b[39m.\u001b[39mOpError \u001b[39mas\u001b[39;00m e:\n",
|
208 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:1360\u001b[0m, in \u001b[0;36mBaseSession._do_run.<locals>._run_fn\u001b[1;34m(feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1358'>1359</a>\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_extend_graph()\n\u001b[1;32m-> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1359'>1360</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_tf_sessionrun(options, feed_dict, fetch_list,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1360'>1361</a>\u001b[0m target_list, run_metadata)\n",
|
209 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:1453\u001b[0m, in \u001b[0;36mBaseSession._call_tf_sessionrun\u001b[1;34m(self, options, feed_dict, fetch_list, target_list, run_metadata)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1450'>1451</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_call_tf_sessionrun\u001b[39m(\u001b[39mself\u001b[39m, options, feed_dict, fetch_list, target_list,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1451'>1452</a>\u001b[0m run_metadata):\n\u001b[1;32m-> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1452'>1453</a>\u001b[0m \u001b[39mreturn\u001b[39;00m tf_session\u001b[39m.\u001b[39;49mTF_SessionRun_wrapper(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_session, options, feed_dict,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1453'>1454</a>\u001b[0m fetch_list, target_list,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1454'>1455</a>\u001b[0m run_metadata)\n",
|
210 |
+
"\u001b[1;31mResourceExhaustedError\u001b[0m: failed to allocate memory\n\t [[{{node model/h10/attn/ArithmeticOptimizer/ReorderCastLikeAndValuePreserving_float_Cast_1}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n",
|
211 |
+
"\nDuring handling of the above exception, another exception occurred:\n",
|
212 |
+
"\u001b[1;31mResourceExhaustedError\u001b[0m Traceback (most recent call last)",
|
213 |
+
"\u001b[1;32mc:\\Users\\franz\\OneDrive\\Documentos\\GitHub\\Generador-de-abstracts\\AbstractGenerator.ipynb Cell 15'\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/franz/OneDrive/Documentos/GitHub/Generador-de-abstracts/AbstractGenerator.ipynb#ch0000014?line=0'>1</a>\u001b[0m gpt2\u001b[39m.\u001b[39;49mfinetune(sess,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/OneDrive/Documentos/GitHub/Generador-de-abstracts/AbstractGenerator.ipynb#ch0000014?line=1'>2</a>\u001b[0m file_path_es\u001b[39m+\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m.txt\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/OneDrive/Documentos/GitHub/Generador-de-abstracts/AbstractGenerator.ipynb#ch0000014?line=2'>3</a>\u001b[0m model_name\u001b[39m=\u001b[39;49mmodel_name,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/OneDrive/Documentos/GitHub/Generador-de-abstracts/AbstractGenerator.ipynb#ch0000014?line=3'>4</a>\u001b[0m checkpoint_dir\u001b[39m=\u001b[39;49mcheckpoint_dir, \n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/OneDrive/Documentos/GitHub/Generador-de-abstracts/AbstractGenerator.ipynb#ch0000014?line=4'>5</a>\u001b[0m steps\u001b[39m=\u001b[39;49m\u001b[39m1000\u001b[39;49m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/OneDrive/Documentos/GitHub/Generador-de-abstracts/AbstractGenerator.ipynb#ch0000014?line=5'>6</a>\u001b[0m )\n",
|
214 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\gpt_2_simple\\gpt_2.py:339\u001b[0m, in \u001b[0;36mfinetune\u001b[1;34m(sess, dataset, steps, model_name, model_dir, combine, batch_size, learning_rate, accumulate_gradients, restore_from, run_name, checkpoint_dir, sample_every, sample_length, sample_num, multi_gpu, save_every, print_every, max_checkpoints, use_memory_saving_gradients, only_train_transformer_layers, optimizer, overwrite, reuse)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/gpt_2_simple/gpt_2.py?line=336'>337</a>\u001b[0m sess\u001b[39m.\u001b[39mrun(opt_reset)\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/gpt_2_simple/gpt_2.py?line=337'>338</a>\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(accumulate_gradients):\n\u001b[1;32m--> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/gpt_2_simple/gpt_2.py?line=338'>339</a>\u001b[0m sess\u001b[39m.\u001b[39;49mrun(\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/gpt_2_simple/gpt_2.py?line=339'>340</a>\u001b[0m opt_compute, feed_dict\u001b[39m=\u001b[39;49m{context: sample_batch()})\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/gpt_2_simple/gpt_2.py?line=340'>341</a>\u001b[0m (v_loss, v_summary) \u001b[39m=\u001b[39m sess\u001b[39m.\u001b[39mrun((opt_apply, summary_loss))\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/gpt_2_simple/gpt_2.py?line=341'>342</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n",
|
215 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:967\u001b[0m, in \u001b[0;36mBaseSession.run\u001b[1;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=963'>964</a>\u001b[0m run_metadata_ptr \u001b[39m=\u001b[39m tf_session\u001b[39m.\u001b[39mTF_NewBuffer() \u001b[39mif\u001b[39;00m run_metadata \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=965'>966</a>\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=966'>967</a>\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_run(\u001b[39mNone\u001b[39;49;00m, fetches, feed_dict, options_ptr,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=967'>968</a>\u001b[0m run_metadata_ptr)\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=968'>969</a>\u001b[0m \u001b[39mif\u001b[39;00m run_metadata:\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=969'>970</a>\u001b[0m proto_data \u001b[39m=\u001b[39m tf_session\u001b[39m.\u001b[39mTF_GetBuffer(run_metadata_ptr)\n",
|
216 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:1190\u001b[0m, in \u001b[0;36mBaseSession._run\u001b[1;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1186'>1187</a>\u001b[0m \u001b[39m# We only want to really perform the run if fetches or targets are provided,\u001b[39;00m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1187'>1188</a>\u001b[0m \u001b[39m# or if the call is a partial run that specifies feeds.\u001b[39;00m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1188'>1189</a>\u001b[0m \u001b[39mif\u001b[39;00m final_fetches \u001b[39mor\u001b[39;00m final_targets \u001b[39mor\u001b[39;00m (handle \u001b[39mand\u001b[39;00m feed_dict_tensor):\n\u001b[1;32m-> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1189'>1190</a>\u001b[0m results \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_do_run(handle, final_targets, final_fetches,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1190'>1191</a>\u001b[0m feed_dict_tensor, options, run_metadata)\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1191'>1192</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1192'>1193</a>\u001b[0m results \u001b[39m=\u001b[39m []\n",
|
217 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:1370\u001b[0m, in \u001b[0;36mBaseSession._do_run\u001b[1;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1366'>1367</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call_tf_sessionprun(handle, feed_dict, fetch_list)\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1368'>1369</a>\u001b[0m \u001b[39mif\u001b[39;00m handle \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m-> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1369'>1370</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_do_call(_run_fn, feeds, fetches, targets, options,\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1370'>1371</a>\u001b[0m run_metadata)\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1371'>1372</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1372'>1373</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_do_call(_prun_fn, handle, feeds, fetches)\n",
|
218 |
+
"File \u001b[1;32m~\\.conda\\envs\\tf-gpu\\lib\\site-packages\\tensorflow\\python\\client\\session.py:1396\u001b[0m, in \u001b[0;36mBaseSession._do_call\u001b[1;34m(self, fn, *args)\u001b[0m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1390'>1391</a>\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m'\u001b[39m\u001b[39monly supports NHWC tensor format\u001b[39m\u001b[39m'\u001b[39m \u001b[39min\u001b[39;00m message:\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1391'>1392</a>\u001b[0m message \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m (\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mA possible workaround: Try disabling Grappler optimizer\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1392'>1393</a>\u001b[0m \u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mby modifying the config for creating the session eg.\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1393'>1394</a>\u001b[0m \u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39msession_config.graph_options.rewrite_options.\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1394'>1395</a>\u001b[0m \u001b[39m'\u001b[39m\u001b[39mdisable_meta_optimizer = True\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m-> <a href='file:///c%3A/Users/franz/.conda/envs/tf-gpu/lib/site-packages/tensorflow/python/client/session.py?line=1395'>1396</a>\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mtype\u001b[39m(e)(node_def, op, message)\n",
|
219 |
+
"\u001b[1;31mResourceExhaustedError\u001b[0m: Graph execution error:\n\nfailed to allocate memory\n\t [[{{node model/h10/attn/ArithmeticOptimizer/ReorderCastLikeAndValuePreserving_float_Cast_1}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode."
|
220 |
+
]
|
221 |
+
}
|
222 |
+
],
|
223 |
+
"source": [
|
224 |
+
"gpt2.finetune(sess,\n",
|
225 |
+
" file_path_es+'.txt',\n",
|
226 |
+
" model_name=model_name,\n",
|
227 |
+
" checkpoint_dir=checkpoint_dir, \n",
|
228 |
+
" steps=1000\n",
|
229 |
+
" ) "
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"cell_type": "markdown",
|
234 |
+
"metadata": {},
|
235 |
+
"source": [
|
236 |
+
"# test"
|
237 |
+
]
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "markdown",
|
241 |
+
"metadata": {},
|
242 |
+
"source": [
|
243 |
+
"## en "
|
244 |
+
]
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"cell_type": "code",
|
248 |
+
"execution_count": null,
|
249 |
+
"metadata": {},
|
250 |
+
"outputs": [],
|
251 |
+
"source": [
|
252 |
+
"text = \"\"\"Introduction and preliminaries\n",
|
253 |
+
"The focus of this paper is decompositions of (k, `)-sparse graphs into edge-disjoint subgraphs\n",
|
254 |
+
"that certify sparsity. We use graph to mean a multigraph, possibly with loops. We say that a\n",
|
255 |
+
"graph is (k, `)-sparse if no subset of n′ vertices spans more than kn′− ` edges in the graph; a\n",
|
256 |
+
"(k, `)-sparse graph with kn′− ` edges is (k, `)-tight. We call the range k ≤ `≤ 2k−1 the upper\n",
|
257 |
+
"range of sparse graphs and 0≤ `≤ k the lower range.\n",
|
258 |
+
"In this paper, we present efficient algorithms for finding decompositions that certify sparsity\n",
|
259 |
+
"in the upper range of `. Our algorithms also apply in the lower range, which was already ad-\n",
|
260 |
+
"dressed by [3, 4, 5, 6, 19]. A decomposition certifies the sparsity of a graph if the sparse graphs\n",
|
261 |
+
"and graphs admitting the decomposition coincide.\n",
|
262 |
+
"Our algorithms are based on a new characterization of sparse graphs, which we call the\n",
|
263 |
+
"pebble game with colors. The pebble game with colors is a simple graph construction rule that\n",
|
264 |
+
"produces a sparse graph along with a sparsity-certifying decomposition.\n",
|
265 |
+
"We define and study a canonical class of pebble game constructions, which correspond to\n",
|
266 |
+
"previously studied decompositions of sparse graphs into edge disjoint trees. Our results provide\n",
|
267 |
+
"a unifying framework for all the previously known special cases, including Nash-Williams-\n",
|
268 |
+
"Tutte and [7, 24]. Indeed, in the lower range, canonical pebble game constructions capture the\n",
|
269 |
+
"properties of the augmenting paths used in matroid union and intersection algorithms[5, 6].\n",
|
270 |
+
"Since the sparse graphs in the upper range are not known to be unions or intersections of the\n",
|
271 |
+
"matroids for which there are efficient augmenting path algorithms, these do not easily apply in\n",
|
272 |
+
"∗ Research of both authors funded by the NSF under grants NSF CCF-0430990 and NSF-DARPA CARGO\n",
|
273 |
+
"CCR-0310661 to the first author.\n",
|
274 |
+
"2 Ileana Streinu, Louis Theran\n",
|
275 |
+
"Term Meaning\n",
|
276 |
+
"Sparse graph G Every non-empty subgraph on n′ vertices has ≤ kn′− ` edges\n",
|
277 |
+
"Tight graph G G = (V,E) is sparse and |V |= n, |E|= kn− `\n",
|
278 |
+
"Block H in G G is sparse, and H is a tight subgraph\n",
|
279 |
+
"Component H of G G is sparse and H is a maximal block\n",
|
280 |
+
"Map-graph Graph that admits an out-degree-exactly-one orientation\n",
|
281 |
+
"(k, `)-maps-and-trees Edge-disjoint union of ` trees and (k− `) map-grpahs\n",
|
282 |
+
"`Tk Union of ` trees, each vertex is in exactly k of them\n",
|
283 |
+
"Set of tree-pieces of an `Tk induced on V ′ ⊂V Pieces of trees in the `Tk spanned by E(V ′)\n",
|
284 |
+
"Proper `Tk Every V ′ ⊂V contains ≥ ` pieces of trees from the `Tk\n",
|
285 |
+
"Table 1. Sparse graph and decomposition terminology used in this paper.\n",
|
286 |
+
"the upper range. Pebble game with colors constructions may thus be considered a strengthening\n",
|
287 |
+
"of augmenting paths to the upper range of matroidal sparse graphs.\n",
|
288 |
+
"1.1. Sparse graphs\n",
|
289 |
+
"\n",
|
290 |
+
"ABSTRACT\n",
|
291 |
+
"\"\"\""
|
292 |
+
]
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"cell_type": "code",
|
296 |
+
"execution_count": null,
|
297 |
+
"metadata": {},
|
298 |
+
"outputs": [],
|
299 |
+
"source": [
|
300 |
+
"gpt2.generate(sess,prefix=text,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "markdown",
|
305 |
+
"metadata": {},
|
306 |
+
"source": [
|
307 |
+
"## es"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"cell_type": "code",
|
312 |
+
"execution_count": null,
|
313 |
+
"metadata": {},
|
314 |
+
"outputs": [],
|
315 |
+
"source": [
|
316 |
+
"text = \"\"\"El foco de este documento son las descomposicións de (k, `)-sparse gráficos en bordes-disjunto subgraphs\n",
|
317 |
+
"que certifique la escasez. Usamos el gráfico para significar un múltiplo, posiblemente con bucles. Nosotros decimos que un\n",
|
318 |
+
"grafo es (k, `)-sparse si ningún subconjunto de n′ vértices abarca más de kn ` bordes en el gráfico; a\n",
|
319 |
+
"(k, `)-sparse gráfico con kn ` bordes es (k, `)-estrechado. Llamamos al rango k ≤ 2k−1 el superior\n",
|
320 |
+
"rango de gráficos escasos y 0≤ k el rango inferior.\n",
|
321 |
+
"En este artículo, presentamos algoritmos eficientes para encontrar descomposicións que certifiquen la escasez\n",
|
322 |
+
"en el rango superior de `. Nuestros algoritmos también se aplican en el rango inferior, que ya era ad-\n",
|
323 |
+
"vestido por [3, 4, 5, 6, 19]. Una descomposición certifica la escasez de un gráfico si los gráficos dispersos\n",
|
324 |
+
"y los gráficos que admiten la descomposición coinciden.\n",
|
325 |
+
"Nuestros algoritmos se basan en una nueva caracterización de gráficos escasos, que llamamos el\n",
|
326 |
+
"juego de guijarros con colores. El juego de guijarros con colores es una regla de construcción de gráficos simples que\n",
|
327 |
+
"produce un gráfico escaso junto con una descomposición certificadora de la escasez.\n",
|
328 |
+
"Definimos y estudiamos una clase canónica de construcciones de juego de guijarros, que corresponden a\n",
|
329 |
+
"previamente estudiado las descomposiciones de los gráficos escasos en los árboles disjuntos del borde. Nuestros resultados proporcionan\n",
|
330 |
+
"un marco unificador para todos los casos especiales conocidos anteriormente, incluidos Nash-Williams-\n",
|
331 |
+
"Tutte y [7, 24]. De hecho, en el rango inferior, las construcciones canónicas de juego de guijarros capturan la\n",
|
332 |
+
"propiedades de las rutas de aumento utilizadas en los algoritmos de unión de matroides y de intersección[5, 6].\n",
|
333 |
+
"Dado que los gráficos escasos en el rango superior no se sabe que son uniones o intersecciones de la\n",
|
334 |
+
"matroides para los que hay algoritmos de ruta de aumento eficiente, estos no se aplican fácilmente en\n",
|
335 |
+
"* Investigación de ambos autores financiada por la NSF bajo subvenciones NSF CCF-0430990 y NSF-DARPA CARGO\n",
|
336 |
+
"CCR-0310661 al primer autor.\n",
|
337 |
+
"2 Ileana Streinu, Louis Theran\n",
|
338 |
+
"Significado del término\n",
|
339 |
+
"Gráfico escaso G Cada subgrafo no vacío en n′ vértices tiene ≤ kn ` bordes\n",
|
340 |
+
"El gráfico ajustado G G = (V,E) es escaso y V = n, E= kn− `\n",
|
341 |
+
"El bloque H en G G es escaso, y H es un subgrafo apretado\n",
|
342 |
+
"El componente H de G G es escaso y H es un bloqueo máximo\n",
|
343 |
+
"Gráfico cartográfico que admite una orientación de grado-exactamente-uno\n",
|
344 |
+
"(k, `)-maps-and-trees Edge-disjunt union de ` árboles y (k- `) map-grpahs\n",
|
345 |
+
"`Tk Unión de ` árboles, cada vértice está exactamente en k de ellos\n",
|
346 |
+
"Conjunto de piezas arbóreas de un `Tk inducido en V ′ ́V Piezas de árboles en el `Tk extendido por E(V ′)\n",
|
347 |
+
"`Tk Apropiado Cada V ′ V contiene ≥ ` pedazos de árboles de la `Tk\n",
|
348 |
+
"Cuadro 1 Gráfico escaso y terminología de descomposición utilizada en este artículo.\n",
|
349 |
+
"el rango superior. Pebble juego con construcciones de colores por lo tanto puede ser considerado un fortalecimiento\n",
|
350 |
+
"de caminos de aumento a la gama superior de gráficos de la escasez matroidal.\n",
|
351 |
+
"1.1. Gráficos escasos\n",
|
352 |
+
"Un gráfico es (k, `)-sparse si para cualquier subgrafo no vacío con bordes m′ y n′ vértices, m′ ≤\n",
|
353 |
+
"kn `. Observamos que esta condición implica que 0 ≤ ` ≤ 2k− 1, y a partir de ahora en este\n",
|
354 |
+
"Haremos esta suposición. Un gráfico escaso que tiene n vértices y exactamente bordes kn\n",
|
355 |
+
"se llama apretado.\n",
|
356 |
+
"Para un gráfico G = (V,E), y V ′ V, utilizamos el intervalo de notación (V ′) para el número de bordes\n",
|
357 |
+
"en el subgráfico inducido por V ′. En un gráfico dirigido, out(V ′) es el número de bordes con la cola\n",
|
358 |
+
"en V ′ y la cabeza en V −V ′; para un subgráfico inducido por V ′, llamamos a tal borde un borde superior.\n",
|
359 |
+
"Hay dos tipos importantes de subgrafías de gráficos escasos. Un bloque es un subgrafo apretado de\n",
|
360 |
+
"un gráfico escaso. Un componente es un bloque máximo.\n",
|
361 |
+
"La Tabla 1 resume la escasa terminología gráfica utilizada en este artículo.\n",
|
362 |
+
"1.2. Descomposiciónes de certificación de la sparsidad\n",
|
363 |
+
"Un k-arborescencia es un gráfico que admite una descomposición en k borde-desjunto que abarca los árboles.\n",
|
364 |
+
"La Figura 1(a) muestra un ejemplo de una 3-arborescencia. Se describen los gráficos k-arborescentes\n",
|
365 |
+
"por los conocidos teoremas de Tutte [23] y Nash-Williams [17] como exactamente el (k,k) apretado\n",
|
366 |
+
"gráficos.\n",
|
367 |
+
"ABSTRACT\n",
|
368 |
+
"\"\"\""
|
369 |
+
]
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"cell_type": "code",
|
373 |
+
"execution_count": null,
|
374 |
+
"metadata": {},
|
375 |
+
"outputs": [],
|
376 |
+
"source": [
|
377 |
+
"gpt2.generate(sess,prefix=text,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"cell_type": "markdown",
|
382 |
+
"metadata": {},
|
383 |
+
"source": [
|
384 |
+
"# gradio interface"
|
385 |
+
]
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"cell_type": "code",
|
389 |
+
"execution_count": null,
|
390 |
+
"metadata": {},
|
391 |
+
"outputs": [],
|
392 |
+
"source": [
|
393 |
+
"def generateAbstract(text):\n",
|
394 |
+
" # with tf.compat.v1.variable_scope(\"weight\", reuse = True):\n",
|
395 |
+
" #sess = tf.compat.v1.get_variable('sess',gpt2.start_tf_sess())\n",
|
396 |
+
" tf.compat.v1.reset_default_graph()\n",
|
397 |
+
" sess = gpt2.start_tf_sess()\n",
|
398 |
+
" gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')\n",
|
399 |
+
" txt = gpt2.generate(sess,prefix=str(text)+\"\\nABSTRACT\", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0]\n",
|
400 |
+
" return str(txt[txt.find('ABSTRACT'):])\n",
|
401 |
+
"\n",
|
402 |
+
"\n",
|
403 |
+
"\n",
|
404 |
+
"iface = gr.Interface(fn=generateAbstract, inputs=gr.inputs.Textbox(lines=10, placeholder=\"text\"), outputs=\"textbox\")\n",
|
405 |
+
"iface.launch(debug = True )"
|
406 |
+
]
|
407 |
+
}
|
408 |
+
],
|
409 |
+
"metadata": {
|
410 |
+
"interpreter": {
|
411 |
+
"hash": "53fbdc69e3e12c371950068c144423682c30d04ec68c2bd46937202e33e0058d"
|
412 |
+
},
|
413 |
+
"kernelspec": {
|
414 |
+
"display_name": "Python 3.7.11 ('receta')",
|
415 |
+
"language": "python",
|
416 |
+
"name": "python3"
|
417 |
+
},
|
418 |
+
"language_info": {
|
419 |
+
"codemirror_mode": {
|
420 |
+
"name": "ipython",
|
421 |
+
"version": 3
|
422 |
+
},
|
423 |
+
"file_extension": ".py",
|
424 |
+
"mimetype": "text/x-python",
|
425 |
+
"name": "python",
|
426 |
+
"nbconvert_exporter": "python",
|
427 |
+
"pygments_lexer": "ipython3",
|
428 |
+
"version": "3.9.7"
|
429 |
+
},
|
430 |
+
"orig_nbformat": 4
|
431 |
+
},
|
432 |
+
"nbformat": 4,
|
433 |
+
"nbformat_minor": 2
|
434 |
+
}
|
AbstractGenerator/TrainigData/en.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:814f983aa49ccc33a993a7d12f67a2eb2a7ca0b15d8697e82b50d3a19f3e1595
|
3 |
+
size 35400974
|
AbstractGenerator/TrainigData/es.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2454067cfe384e1d824b3f5d29cb5c4e1ff292289ad4b37c6cbd22f5cc715295
|
3 |
+
size 44460970
|
AbstractGenerator/weights/run1/encoder.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783
|
3 |
+
size 1042301
|
AbstractGenerator/weights/run1/events.out.tfevents.1648184225.FRANZ96521-W11
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83a88ba7f3268f11289fb24fd13db1367b91acce6466c4ad394011e10ea4c304
|
3 |
+
size 82
|
AbstractGenerator/weights/run1/events.out.tfevents.1648184499.FRANZ96521-W11
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb8646e6bf1e1b8cc26f8128ec4e4c2e797dac297939450a8bf46057e7388a6a
|
3 |
+
size 82
|
AbstractGenerator/weights/run1/events.out.tfevents.1648229481.FRANZ96521-W11
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04d1f71db542da83fee4fe8574bf382cb5324b6decef506206250b8fea85abd0
|
3 |
+
size 82
|
AbstractGenerator/weights/run1/hparams.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9d56e4121c427164e0c55c6f03c08e1daf9002b9b672825112d19097b680318
|
3 |
+
size 90
|
AbstractGenerator/weights/run1/vocab.bpe
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5
|
3 |
+
size 456318
|
Descarga.ipynb
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import json\n",
|
11 |
+
"from pandas import json_normalize\n",
|
12 |
+
"import requests\n",
|
13 |
+
"from pathlib import Path\n",
|
14 |
+
"from multiprocessing.pool import ThreadPool as Pool\n",
|
15 |
+
"import codecs\n",
|
16 |
+
"import random\n",
|
17 |
+
"import re"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cell_type": "code",
|
22 |
+
"execution_count": 3,
|
23 |
+
"metadata": {},
|
24 |
+
"outputs": [],
|
25 |
+
"source": [
|
26 |
+
"URL_BASE = \"https://arxiv.org/pdf/\"\n",
|
27 |
+
"PDF_PATH = 'PDF'\n",
|
28 |
+
"TXT_PATH= 'TXT'"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "markdown",
|
33 |
+
"metadata": {},
|
34 |
+
"source": [
|
35 |
+
"# Arxiv\n"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 4,
|
41 |
+
"metadata": {},
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"name": "stdout",
|
45 |
+
"output_type": "stream",
|
46 |
+
"text": [
|
47 |
+
"<class 'pandas.core.frame.DataFrame'>\n"
|
48 |
+
]
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"data": {
|
52 |
+
"text/html": [
|
53 |
+
"<div>\n",
|
54 |
+
"<style scoped>\n",
|
55 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
56 |
+
" vertical-align: middle;\n",
|
57 |
+
" }\n",
|
58 |
+
"\n",
|
59 |
+
" .dataframe tbody tr th {\n",
|
60 |
+
" vertical-align: top;\n",
|
61 |
+
" }\n",
|
62 |
+
"\n",
|
63 |
+
" .dataframe thead th {\n",
|
64 |
+
" text-align: right;\n",
|
65 |
+
" }\n",
|
66 |
+
"</style>\n",
|
67 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
68 |
+
" <thead>\n",
|
69 |
+
" <tr style=\"text-align: right;\">\n",
|
70 |
+
" <th></th>\n",
|
71 |
+
" <th>id</th>\n",
|
72 |
+
" <th>title</th>\n",
|
73 |
+
" <th>abstract</th>\n",
|
74 |
+
" <th>Text</th>\n",
|
75 |
+
" </tr>\n",
|
76 |
+
" </thead>\n",
|
77 |
+
" <tbody>\n",
|
78 |
+
" <tr>\n",
|
79 |
+
" <th>0</th>\n",
|
80 |
+
" <td>0704.0001</td>\n",
|
81 |
+
" <td>Calculation of prompt diphoton production cros...</td>\n",
|
82 |
+
" <td>A fully differential calculation in perturba...</td>\n",
|
83 |
+
" <td></td>\n",
|
84 |
+
" </tr>\n",
|
85 |
+
" <tr>\n",
|
86 |
+
" <th>1</th>\n",
|
87 |
+
" <td>0704.0002</td>\n",
|
88 |
+
" <td>Sparsity-certifying Graph Decompositions</td>\n",
|
89 |
+
" <td>We describe a new algorithm, the $(k,\\ell)$-...</td>\n",
|
90 |
+
" <td></td>\n",
|
91 |
+
" </tr>\n",
|
92 |
+
" <tr>\n",
|
93 |
+
" <th>2</th>\n",
|
94 |
+
" <td>0704.0003</td>\n",
|
95 |
+
" <td>The evolution of the Earth-Moon system based o...</td>\n",
|
96 |
+
" <td>The evolution of Earth-Moon system is descri...</td>\n",
|
97 |
+
" <td></td>\n",
|
98 |
+
" </tr>\n",
|
99 |
+
" <tr>\n",
|
100 |
+
" <th>3</th>\n",
|
101 |
+
" <td>0704.0004</td>\n",
|
102 |
+
" <td>A determinant of Stirling cycle numbers counts...</td>\n",
|
103 |
+
" <td>We show that a determinant of Stirling cycle...</td>\n",
|
104 |
+
" <td></td>\n",
|
105 |
+
" </tr>\n",
|
106 |
+
" <tr>\n",
|
107 |
+
" <th>4</th>\n",
|
108 |
+
" <td>0704.0005</td>\n",
|
109 |
+
" <td>From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...</td>\n",
|
110 |
+
" <td>In this paper we show how to compute the $\\L...</td>\n",
|
111 |
+
" <td></td>\n",
|
112 |
+
" </tr>\n",
|
113 |
+
" <tr>\n",
|
114 |
+
" <th>...</th>\n",
|
115 |
+
" <td>...</td>\n",
|
116 |
+
" <td>...</td>\n",
|
117 |
+
" <td>...</td>\n",
|
118 |
+
" <td>...</td>\n",
|
119 |
+
" </tr>\n",
|
120 |
+
" <tr>\n",
|
121 |
+
" <th>1996</th>\n",
|
122 |
+
" <td>0704.1997</td>\n",
|
123 |
+
" <td>Query on Negative Temperature, Internal Intera...</td>\n",
|
124 |
+
" <td>After negative temperature is restated, we f...</td>\n",
|
125 |
+
" <td></td>\n",
|
126 |
+
" </tr>\n",
|
127 |
+
" <tr>\n",
|
128 |
+
" <th>1997</th>\n",
|
129 |
+
" <td>0704.1998</td>\n",
|
130 |
+
" <td>Absence of the Fifth Force Problem in a Model ...</td>\n",
|
131 |
+
" <td>A scale invariant model containing dilaton $...</td>\n",
|
132 |
+
" <td></td>\n",
|
133 |
+
" </tr>\n",
|
134 |
+
" <tr>\n",
|
135 |
+
" <th>1998</th>\n",
|
136 |
+
" <td>0704.1999</td>\n",
|
137 |
+
" <td>Dark matter caustics and the enhancement of se...</td>\n",
|
138 |
+
" <td>Cold dark matter haloes are populated by cau...</td>\n",
|
139 |
+
" <td></td>\n",
|
140 |
+
" </tr>\n",
|
141 |
+
" <tr>\n",
|
142 |
+
" <th>1999</th>\n",
|
143 |
+
" <td>0704.2000</td>\n",
|
144 |
+
" <td>Search for a Higgs boson produced in associati...</td>\n",
|
145 |
+
" <td>We describe a search for the standard model ...</td>\n",
|
146 |
+
" <td></td>\n",
|
147 |
+
" </tr>\n",
|
148 |
+
" <tr>\n",
|
149 |
+
" <th>2000</th>\n",
|
150 |
+
" <td>0704.2001</td>\n",
|
151 |
+
" <td>Geometry of Parallelizable Manifolds in the Co...</td>\n",
|
152 |
+
" <td>In this paper, we deal with a generalization...</td>\n",
|
153 |
+
" <td></td>\n",
|
154 |
+
" </tr>\n",
|
155 |
+
" </tbody>\n",
|
156 |
+
"</table>\n",
|
157 |
+
"<p>2001 rows × 4 columns</p>\n",
|
158 |
+
"</div>"
|
159 |
+
],
|
160 |
+
"text/plain": [
|
161 |
+
" id title \\\n",
|
162 |
+
"0 0704.0001 Calculation of prompt diphoton production cros... \n",
|
163 |
+
"1 0704.0002 Sparsity-certifying Graph Decompositions \n",
|
164 |
+
"2 0704.0003 The evolution of the Earth-Moon system based o... \n",
|
165 |
+
"3 0704.0004 A determinant of Stirling cycle numbers counts... \n",
|
166 |
+
"4 0704.0005 From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a... \n",
|
167 |
+
"... ... ... \n",
|
168 |
+
"1996 0704.1997 Query on Negative Temperature, Internal Intera... \n",
|
169 |
+
"1997 0704.1998 Absence of the Fifth Force Problem in a Model ... \n",
|
170 |
+
"1998 0704.1999 Dark matter caustics and the enhancement of se... \n",
|
171 |
+
"1999 0704.2000 Search for a Higgs boson produced in associati... \n",
|
172 |
+
"2000 0704.2001 Geometry of Parallelizable Manifolds in the Co... \n",
|
173 |
+
"\n",
|
174 |
+
" abstract Text \n",
|
175 |
+
"0 A fully differential calculation in perturba... \n",
|
176 |
+
"1 We describe a new algorithm, the $(k,\\ell)$-... \n",
|
177 |
+
"2 The evolution of Earth-Moon system is descri... \n",
|
178 |
+
"3 We show that a determinant of Stirling cycle... \n",
|
179 |
+
"4 In this paper we show how to compute the $\\L... \n",
|
180 |
+
"... ... ... \n",
|
181 |
+
"1996 After negative temperature is restated, we f... \n",
|
182 |
+
"1997 A scale invariant model containing dilaton $... \n",
|
183 |
+
"1998 Cold dark matter haloes are populated by cau... \n",
|
184 |
+
"1999 We describe a search for the standard model ... \n",
|
185 |
+
"2000 In this paper, we deal with a generalization... \n",
|
186 |
+
"\n",
|
187 |
+
"[2001 rows x 4 columns]"
|
188 |
+
]
|
189 |
+
},
|
190 |
+
"execution_count": 4,
|
191 |
+
"metadata": {},
|
192 |
+
"output_type": "execute_result"
|
193 |
+
}
|
194 |
+
],
|
195 |
+
"source": [
|
196 |
+
"data = pd.read_json('ARxiv/arxiv-metadata-oai-snapshot.json',lines=True, chunksize=2001,dtype={'id':'str'})\n",
|
197 |
+
"df = None\n",
|
198 |
+
"for i in data:\n",
|
199 |
+
" df = i \n",
|
200 |
+
" print(type(i))\n",
|
201 |
+
" break\n",
|
202 |
+
"df = df[['id','title','abstract']]\n",
|
203 |
+
"df.insert(3, \"Text\", \"\") \n",
|
204 |
+
"df"
|
205 |
+
]
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"cell_type": "code",
|
209 |
+
"execution_count": 7,
|
210 |
+
"metadata": {},
|
211 |
+
"outputs": [],
|
212 |
+
"source": [
|
213 |
+
"def GetFileURL(file_id):\n",
|
214 |
+
" url = URL_BASE+file_id\n",
|
215 |
+
" r = requests.get(url, stream=True) \n",
|
216 |
+
" filename = Path(PDF_PATH+'/'+file_id+'.pdf')\n",
|
217 |
+
" response = requests.get(url)\n",
|
218 |
+
" filename.write_bytes(response.content)"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "code",
|
223 |
+
"execution_count": 35,
|
224 |
+
"metadata": {},
|
225 |
+
"outputs": [],
|
226 |
+
"source": [
|
227 |
+
"pool_size = 16 \n",
|
228 |
+
"def worker(file):\n",
|
229 |
+
" try:\n",
|
230 |
+
" GetFileURL(file)\n",
|
231 |
+
" except:\n",
|
232 |
+
" print('error with item '+ file)\n",
|
233 |
+
" try:\n",
|
234 |
+
" with codecs.open(PDF_PATH+'/log.txt', 'a') as the_file: \n",
|
235 |
+
" the_file.writelines(str(file)+\"\\n\")\n",
|
236 |
+
" except:\n",
|
237 |
+
" print('error en log '+ file)\n",
|
238 |
+
"def get_ids(iteracion,batch=100): \n",
|
239 |
+
" inicio = int(iteracion*batch)\n",
|
240 |
+
" filesId = data[inicio :inicio + batch]['id']\n",
|
241 |
+
" return filesId\n",
|
242 |
+
"\n",
|
243 |
+
"pool = Pool(pool_size)\n",
|
244 |
+
"filesId = get_ids(19)\n",
|
245 |
+
"for file in filesId:\n",
|
246 |
+
" pool.apply_async(worker, (file,))\n",
|
247 |
+
"\n",
|
248 |
+
"pool.close()\n",
|
249 |
+
"pool.join()"
|
250 |
+
]
|
251 |
+
}
|
252 |
+
],
|
253 |
+
"metadata": {
|
254 |
+
"interpreter": {
|
255 |
+
"hash": "3f7e9d73c32ad96f75174922c475a50b168aad887cbaa14717912a88f31d3802"
|
256 |
+
},
|
257 |
+
"kernelspec": {
|
258 |
+
"display_name": "Python 3.9.7 ('tf-gpu')",
|
259 |
+
"language": "python",
|
260 |
+
"name": "python3"
|
261 |
+
},
|
262 |
+
"language_info": {
|
263 |
+
"codemirror_mode": {
|
264 |
+
"name": "ipython",
|
265 |
+
"version": 3
|
266 |
+
},
|
267 |
+
"file_extension": ".py",
|
268 |
+
"mimetype": "text/x-python",
|
269 |
+
"name": "python",
|
270 |
+
"nbconvert_exporter": "python",
|
271 |
+
"pygments_lexer": "ipython3",
|
272 |
+
"version": "3.9.7"
|
273 |
+
},
|
274 |
+
"orig_nbformat": 4
|
275 |
+
},
|
276 |
+
"nbformat": 4,
|
277 |
+
"nbformat_minor": 2
|
278 |
+
}
|
PDF_a_TXT.ipynb
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Requirement already satisfied: PyPDF2 in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (1.26.0)\n",
|
13 |
+
"Requirement already satisfied: tika in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (1.24)\n",
|
14 |
+
"Requirement already satisfied: requests in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (from tika) (2.27.1)\n",
|
15 |
+
"Requirement already satisfied: setuptools in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (from tika) (58.0.4)\n",
|
16 |
+
"Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (from requests->tika) (2.0.4)\n",
|
17 |
+
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (from requests->tika) (2021.10.8)\n",
|
18 |
+
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (from requests->tika) (3.3)\n",
|
19 |
+
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\franz\\.conda\\envs\\tensorflow\\lib\\site-packages (from requests->tika) (1.26.8)\n"
|
20 |
+
]
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"source": [
|
24 |
+
"! pip install PyPDF2\n",
|
25 |
+
"! pip install tika"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "code",
|
30 |
+
"execution_count": 1,
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"from tika import parser\n",
|
35 |
+
"import codecs\n",
|
36 |
+
"import os"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 2,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [],
|
44 |
+
"source": [
|
45 |
+
"def obtener_texto(file_path,store_path):\n",
|
46 |
+
" file_data = parser.from_file(file_path)\n",
|
47 |
+
" output = file_data['content']\n",
|
48 |
+
" output = output.strip() \n",
|
49 |
+
" output= output.split('\\n')\n",
|
50 |
+
" with codecs.open(store_path+'.txt', 'w','utf-8') as the_file: \n",
|
51 |
+
" for line in output:\n",
|
52 |
+
" #print(line)\n",
|
53 |
+
" if len(line)>4: \n",
|
54 |
+
" the_file.write(str(line)+'\\n')\n"
|
55 |
+
]
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"cell_type": "code",
|
59 |
+
"execution_count": 3,
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [
|
62 |
+
{
|
63 |
+
"name": "stderr",
|
64 |
+
"output_type": "stream",
|
65 |
+
"text": [
|
66 |
+
"2022-03-17 17:02:20,018 [MainThread ] [WARNI] Failed to see startup log message; retrying...\n"
|
67 |
+
]
|
68 |
+
}
|
69 |
+
],
|
70 |
+
"source": [
|
71 |
+
"PDF_PATH = 'PDF'\n",
|
72 |
+
"TXT_PATH= 'TXT'\n",
|
73 |
+
"files = os.listdir(PDF_PATH)\n",
|
74 |
+
"for file in files:\n",
|
75 |
+
" obtener_texto(PDF_PATH+'/'+file,TXT_PATH+'/'+file)\n",
|
76 |
+
" "
|
77 |
+
]
|
78 |
+
}
|
79 |
+
],
|
80 |
+
"metadata": {
|
81 |
+
"interpreter": {
|
82 |
+
"hash": "3f7e9d73c32ad96f75174922c475a50b168aad887cbaa14717912a88f31d3802"
|
83 |
+
},
|
84 |
+
"kernelspec": {
|
85 |
+
"display_name": "Python 3.9.7 ('tf-gpu')",
|
86 |
+
"language": "python",
|
87 |
+
"name": "python3"
|
88 |
+
},
|
89 |
+
"language_info": {
|
90 |
+
"codemirror_mode": {
|
91 |
+
"name": "ipython",
|
92 |
+
"version": 3
|
93 |
+
},
|
94 |
+
"file_extension": ".py",
|
95 |
+
"mimetype": "text/x-python",
|
96 |
+
"name": "python",
|
97 |
+
"nbconvert_exporter": "python",
|
98 |
+
"pygments_lexer": "ipython3",
|
99 |
+
"version": "3.9.7"
|
100 |
+
},
|
101 |
+
"orig_nbformat": 4
|
102 |
+
},
|
103 |
+
"nbformat": 4,
|
104 |
+
"nbformat_minor": 2
|
105 |
+
}
|
models/124M/checkpoint
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd1b025d2e155283f5e300ce95bf6d5b6bc0f7fe010db73daa6975eb896ab9cb
|
3 |
+
size 77
|
models/124M/encoder.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783
|
3 |
+
size 1042301
|
models/124M/hparams.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9d56e4121c427164e0c55c6f03c08e1daf9002b9b672825112d19097b680318
|
3 |
+
size 90
|
models/124M/model.ckpt.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2060c885360cc0cf41d7a6dbc4d24b5127aae20260c8b5ae521b5a6578407118
|
3 |
+
size 497759232
|
models/124M/model.ckpt.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71916f763f9746f9b2a06b12d91996cf1084ae008d0424543d39391c5f2dc687
|
3 |
+
size 5215
|
models/124M/model.ckpt.meta
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4668c448fa11531fd6700460487f73e82d3272960cea942252f8744bf225c77b
|
3 |
+
size 471155
|
models/124M/vocab.bpe
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5
|
3 |
+
size 456318
|
txt_to_csv.ipynb
ADDED
@@ -0,0 +1,662 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import os\n",
|
11 |
+
"from easynmt import EasyNMT\n"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": null,
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"URL_BASE = \"https://arxiv.org/pdf/\"\n",
|
21 |
+
"PDF_PATH = 'PDF'\n",
|
22 |
+
"TXT_PATH= 'TXT'\n",
|
23 |
+
"CSV_PATH = 'CSV'"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "markdown",
|
28 |
+
"metadata": {},
|
29 |
+
"source": [
|
30 |
+
"# Get Data from TXT"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"execution_count": null,
|
36 |
+
"metadata": {},
|
37 |
+
"outputs": [],
|
38 |
+
"source": [
|
39 |
+
"data = pd.read_json('ARxiv/arxiv-metadata-oai-snapshot.json',lines=True, chunksize=2001,dtype={'id':'str'})\n",
|
40 |
+
"df = None\n",
|
41 |
+
"for i in data:\n",
|
42 |
+
" df = i \n",
|
43 |
+
" print(type(i))\n",
|
44 |
+
" break\n",
|
45 |
+
"df = df[['id','title','abstract']]\n"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"cell_type": "code",
|
50 |
+
"execution_count": null,
|
51 |
+
"metadata": {},
|
52 |
+
"outputs": [],
|
53 |
+
"source": [
|
54 |
+
"for file in df['id']:\n",
|
55 |
+
" file_path = TXT_PATH+'/'+str(file)+'.pdf.txt'\n",
|
56 |
+
" if os.path.isfile(file_path):\n",
|
57 |
+
" with open(file_path,'r',encoding='utf8') as f:\n",
|
58 |
+
" s =str( f.read()) \n",
|
59 |
+
" df.loc[df['id'] == str(file),'full_text'] = s "
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": null,
|
65 |
+
"metadata": {},
|
66 |
+
"outputs": [],
|
67 |
+
"source": [
|
68 |
+
"df = df.dropna()\n",
|
69 |
+
"df.reset_index()\n",
|
70 |
+
"df.to_csv(CSV_PATH+'/scientific_paper_en.csv',index=False,encoding='utf-8')\n",
|
71 |
+
"df"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "markdown",
|
76 |
+
"metadata": {},
|
77 |
+
"source": [
|
78 |
+
"# first run \n"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": null,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [],
|
86 |
+
"source": [
|
87 |
+
"df = pd.read_csv(CSV_PATH +'/scientific_paper_en.csv',dtype={'id':'str'})\n",
|
88 |
+
"df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "markdown",
|
93 |
+
"metadata": {},
|
94 |
+
"source": [
|
95 |
+
"# leer datos"
|
96 |
+
]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"cell_type": "code",
|
100 |
+
"execution_count": null,
|
101 |
+
"metadata": {},
|
102 |
+
"outputs": [],
|
103 |
+
"source": [
|
104 |
+
"df = pd.read_csv(CSV_PATH +'/scientific_paper_full_text_translated.csv',dtype={'id':'str'})\n",
|
105 |
+
"print(len(df.index))\n",
|
106 |
+
"df"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "markdown",
|
111 |
+
"metadata": {},
|
112 |
+
"source": [
|
113 |
+
"# translate"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"cell_type": "code",
|
118 |
+
"execution_count": null,
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [],
|
121 |
+
"source": [
|
122 |
+
"model = EasyNMT('opus-mt')"
|
123 |
+
]
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"cell_type": "markdown",
|
127 |
+
"metadata": {},
|
128 |
+
"source": [
|
129 |
+
"## translate full text"
|
130 |
+
]
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"cell_type": "code",
|
134 |
+
"execution_count": null,
|
135 |
+
"metadata": {},
|
136 |
+
"outputs": [],
|
137 |
+
"source": [
|
138 |
+
"max = len(df.index)\n",
|
139 |
+
"for i in range(0,1754):\n",
|
140 |
+
" text = df.iloc[i]['full_text']\n",
|
141 |
+
" translated_text = model.translate(text, target_lang='es')\n",
|
142 |
+
" df.loc[i,'translated'] = translated_text \n",
|
143 |
+
" print(\"listo documento \",i)\n",
|
144 |
+
" if(i%10==0):\n",
|
145 |
+
" df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')\n",
|
146 |
+
"df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "markdown",
|
151 |
+
"metadata": {},
|
152 |
+
"source": [
|
153 |
+
"## translate abstract"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"cell_type": "code",
|
158 |
+
"execution_count": null,
|
159 |
+
"metadata": {},
|
160 |
+
"outputs": [],
|
161 |
+
"source": [
|
162 |
+
"max = len(df.index)\n",
|
163 |
+
"for i in range(0,1754):\n",
|
164 |
+
" text = df.iloc[i]['abstract']\n",
|
165 |
+
" translated_text = model.translate(text, target_lang='es')\n",
|
166 |
+
" df.loc[i,'translated_abstract'] = translated_text \n",
|
167 |
+
" print(\"listo documento \",i)\n",
|
168 |
+
" if(i%100==0):\n",
|
169 |
+
" df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')\n",
|
170 |
+
"df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')\n",
|
171 |
+
"\n"
|
172 |
+
]
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"cell_type": "markdown",
|
176 |
+
"metadata": {},
|
177 |
+
"source": [
|
178 |
+
"# remove abstract"
|
179 |
+
]
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"cell_type": "code",
|
183 |
+
"execution_count": null,
|
184 |
+
"metadata": {},
|
185 |
+
"outputs": [],
|
186 |
+
"source": [
|
187 |
+
"max = len(df.index)-1"
|
188 |
+
]
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"cell_type": "code",
|
192 |
+
"execution_count": null,
|
193 |
+
"metadata": {},
|
194 |
+
"outputs": [],
|
195 |
+
"source": [
|
196 |
+
"end = 'Introducción'\n",
|
197 |
+
"for i in range(0,max):\n",
|
198 |
+
" text = df.iloc[i]['translated'] \n",
|
199 |
+
" p = text.find(end)\n",
|
200 |
+
" if(p != -1): \n",
|
201 |
+
" df.loc[i,'translated_no_abstract'] = text[p:] \n",
|
202 |
+
" else:\n",
|
203 |
+
" df.loc[i,'translated_no_abstract']= text\n",
|
204 |
+
" print(\"listo documento \",i,p)\n",
|
205 |
+
" if(i%1000==0):\n",
|
206 |
+
" df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')\n",
|
207 |
+
"df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')"
|
208 |
+
]
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"cell_type": "code",
|
212 |
+
"execution_count": null,
|
213 |
+
"metadata": {},
|
214 |
+
"outputs": [],
|
215 |
+
"source": [
|
216 |
+
"end = 'Abstract'\n",
|
217 |
+
"for i in range(0,max):\n",
|
218 |
+
" text = df.iloc[i]['full_text'] \n",
|
219 |
+
" p = text.find(end)\n",
|
220 |
+
" if(p != -1): \n",
|
221 |
+
" df.loc[i,'text_no_abstract'] = text[p:] \n",
|
222 |
+
" else:\n",
|
223 |
+
" df.loc[i,'text_no_abstract']= text \n",
|
224 |
+
" if(i%1000==0):\n",
|
225 |
+
" df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')\n",
|
226 |
+
"df.to_csv(CSV_PATH+'/scientific_paper_full_text_translated.csv',index=False,encoding='utf-8')"
|
227 |
+
]
|
228 |
+
},
|
229 |
+
{
|
230 |
+
"cell_type": "markdown",
|
231 |
+
"metadata": {},
|
232 |
+
"source": [
|
233 |
+
"# split data to csv"
|
234 |
+
]
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"cell_type": "code",
|
238 |
+
"execution_count": null,
|
239 |
+
"metadata": {},
|
240 |
+
"outputs": [],
|
241 |
+
"source": [
|
242 |
+
"df = pd.read_csv(CSV_PATH +'/scientific_paper_full_text_translated.csv',dtype={'id':'str'})\n",
|
243 |
+
"df"
|
244 |
+
]
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"cell_type": "code",
|
248 |
+
"execution_count": 77,
|
249 |
+
"metadata": {},
|
250 |
+
"outputs": [
|
251 |
+
{
|
252 |
+
"data": {
|
253 |
+
"text/html": [
|
254 |
+
"<div>\n",
|
255 |
+
"<style scoped>\n",
|
256 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
257 |
+
" vertical-align: middle;\n",
|
258 |
+
" }\n",
|
259 |
+
"\n",
|
260 |
+
" .dataframe tbody tr th {\n",
|
261 |
+
" vertical-align: top;\n",
|
262 |
+
" }\n",
|
263 |
+
"\n",
|
264 |
+
" .dataframe thead th {\n",
|
265 |
+
" text-align: right;\n",
|
266 |
+
" }\n",
|
267 |
+
"</style>\n",
|
268 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
269 |
+
" <thead>\n",
|
270 |
+
" <tr style=\"text-align: right;\">\n",
|
271 |
+
" <th></th>\n",
|
272 |
+
" <th>id</th>\n",
|
273 |
+
" <th>title</th>\n",
|
274 |
+
" <th>full_text</th>\n",
|
275 |
+
" <th>abstract</th>\n",
|
276 |
+
" <th>text_no_abstract</th>\n",
|
277 |
+
" </tr>\n",
|
278 |
+
" </thead>\n",
|
279 |
+
" <tbody>\n",
|
280 |
+
" <tr>\n",
|
281 |
+
" <th>0</th>\n",
|
282 |
+
" <td>0704.0002</td>\n",
|
283 |
+
" <td>Sparsity-certifying Graph Decompositions</td>\n",
|
284 |
+
" <td>Descomposiciones del gráfico de certificación ...</td>\n",
|
285 |
+
" <td>Describimos un nuevo algoritmo, el juego de ...</td>\n",
|
286 |
+
" <td>Introducción y preliminares\\nEl foco de este d...</td>\n",
|
287 |
+
" </tr>\n",
|
288 |
+
" <tr>\n",
|
289 |
+
" <th>1</th>\n",
|
290 |
+
" <td>0704.0003</td>\n",
|
291 |
+
" <td>The evolution of the Earth-Moon system based o...</td>\n",
|
292 |
+
" <td>La evolución del sistema Tierra-Luna basado en...</td>\n",
|
293 |
+
" <td>La evolución del sistema Tierra-Luna es desc...</td>\n",
|
294 |
+
" <td>Introducción \\nLa teoría aceptada popularmente...</td>\n",
|
295 |
+
" </tr>\n",
|
296 |
+
" <tr>\n",
|
297 |
+
" <th>2</th>\n",
|
298 |
+
" <td>0704.0004</td>\n",
|
299 |
+
" <td>A determinant of Stirling cycle numbers counts...</td>\n",
|
300 |
+
" <td>Un determinante de los números de ciclo de Sti...</td>\n",
|
301 |
+
" <td>Demostramos que un determinante de los númer...</td>\n",
|
302 |
+
" <td>Introducción El propósito principal de este ar...</td>\n",
|
303 |
+
" </tr>\n",
|
304 |
+
" <tr>\n",
|
305 |
+
" <th>3</th>\n",
|
306 |
+
" <td>0704.0005</td>\n",
|
307 |
+
" <td>From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...</td>\n",
|
308 |
+
" <td>DE DÍA A DÍA\\nWAEL ABU-SHAMMALA Y ALBERTO TORC...</td>\n",
|
309 |
+
" <td>En este artículo mostramos cómo calcular la ...</td>\n",
|
310 |
+
" <td>DE DÍA A DÍA\\nWAEL ABU-SHAMMALA Y ALBERTO TORC...</td>\n",
|
311 |
+
" </tr>\n",
|
312 |
+
" <tr>\n",
|
313 |
+
" <th>4</th>\n",
|
314 |
+
" <td>0704.0007</td>\n",
|
315 |
+
" <td>Polymer Quantum Mechanics and its Continuum Limit</td>\n",
|
316 |
+
" <td>La mecánica cuántica de polímeros y su límite ...</td>\n",
|
317 |
+
" <td>Una representación cuántica no estándar de l...</td>\n",
|
318 |
+
" <td>La mecánica cuántica de polímeros y su límite ...</td>\n",
|
319 |
+
" </tr>\n",
|
320 |
+
" <tr>\n",
|
321 |
+
" <th>...</th>\n",
|
322 |
+
" <td>...</td>\n",
|
323 |
+
" <td>...</td>\n",
|
324 |
+
" <td>...</td>\n",
|
325 |
+
" <td>...</td>\n",
|
326 |
+
" <td>...</td>\n",
|
327 |
+
" </tr>\n",
|
328 |
+
" <tr>\n",
|
329 |
+
" <th>1749</th>\n",
|
330 |
+
" <td>0704.1996</td>\n",
|
331 |
+
" <td>A Wave-function for Stringy Universes</td>\n",
|
332 |
+
" <td>LPTENS–07/16\\nAbril de 2007\\nUna función de on...</td>\n",
|
333 |
+
" <td>Definimos una función de onda para los fondo...</td>\n",
|
334 |
+
" <td>Introducción\\nNuestro objetivo en este documen...</td>\n",
|
335 |
+
" </tr>\n",
|
336 |
+
" <tr>\n",
|
337 |
+
" <th>1750</th>\n",
|
338 |
+
" <td>0704.1997</td>\n",
|
339 |
+
" <td>Query on Negative Temperature, Internal Intera...</td>\n",
|
340 |
+
" <td>Microsoft Word - negEntr.doc\\nConsulta sobre t...</td>\n",
|
341 |
+
" <td>Después de que la temperatura negativa se vu...</td>\n",
|
342 |
+
" <td>Microsoft Word - negEntr.doc\\nConsulta sobre t...</td>\n",
|
343 |
+
" </tr>\n",
|
344 |
+
" <tr>\n",
|
345 |
+
" <th>1751</th>\n",
|
346 |
+
" <td>0704.1998</td>\n",
|
347 |
+
" <td>Absence of the Fifth Force Problem in a Model ...</td>\n",
|
348 |
+
" <td>Ausencia del problema de la quinta fuerza en u...</td>\n",
|
349 |
+
" <td>Un modelo de escala invariante que contiene ...</td>\n",
|
350 |
+
" <td>Introducción\\n\\tBase de Dos Medidas Teoría de ...</td>\n",
|
351 |
+
" </tr>\n",
|
352 |
+
" <tr>\n",
|
353 |
+
" <th>1752</th>\n",
|
354 |
+
" <td>0704.1999</td>\n",
|
355 |
+
" <td>Dark matter caustics and the enhancement of se...</td>\n",
|
356 |
+
" <td>Proyecto de versión 16 de noviembre de 2018\\nT...</td>\n",
|
357 |
+
" <td>Los haloes fríos de materia oscura están pob...</td>\n",
|
358 |
+
" <td>Proyecto de versión 16 de noviembre de 2018\\nT...</td>\n",
|
359 |
+
" </tr>\n",
|
360 |
+
" <tr>\n",
|
361 |
+
" <th>1753</th>\n",
|
362 |
+
" <td>0704.2000</td>\n",
|
363 |
+
" <td>Search for a Higgs boson produced in associati...</td>\n",
|
364 |
+
" <td>FERMILAB-PUB-07/076-E\\nBúsqueda de un bosón Hi...</td>\n",
|
365 |
+
" <td>Describimos una búsqueda para el modelo está...</td>\n",
|
366 |
+
" <td>FERMILAB-PUB-07/076-E\\nBúsqueda de un bosón Hi...</td>\n",
|
367 |
+
" </tr>\n",
|
368 |
+
" </tbody>\n",
|
369 |
+
"</table>\n",
|
370 |
+
"<p>1754 rows × 5 columns</p>\n",
|
371 |
+
"</div>"
|
372 |
+
],
|
373 |
+
"text/plain": [
|
374 |
+
" id title \\\n",
|
375 |
+
"0 0704.0002 Sparsity-certifying Graph Decompositions \n",
|
376 |
+
"1 0704.0003 The evolution of the Earth-Moon system based o... \n",
|
377 |
+
"2 0704.0004 A determinant of Stirling cycle numbers counts... \n",
|
378 |
+
"3 0704.0005 From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a... \n",
|
379 |
+
"4 0704.0007 Polymer Quantum Mechanics and its Continuum Limit \n",
|
380 |
+
"... ... ... \n",
|
381 |
+
"1749 0704.1996 A Wave-function for Stringy Universes \n",
|
382 |
+
"1750 0704.1997 Query on Negative Temperature, Internal Intera... \n",
|
383 |
+
"1751 0704.1998 Absence of the Fifth Force Problem in a Model ... \n",
|
384 |
+
"1752 0704.1999 Dark matter caustics and the enhancement of se... \n",
|
385 |
+
"1753 0704.2000 Search for a Higgs boson produced in associati... \n",
|
386 |
+
"\n",
|
387 |
+
" full_text \\\n",
|
388 |
+
"0 Descomposiciones del gráfico de certificación ... \n",
|
389 |
+
"1 La evolución del sistema Tierra-Luna basado en... \n",
|
390 |
+
"2 Un determinante de los números de ciclo de Sti... \n",
|
391 |
+
"3 DE DÍA A DÍA\\nWAEL ABU-SHAMMALA Y ALBERTO TORC... \n",
|
392 |
+
"4 La mecánica cuántica de polímeros y su límite ... \n",
|
393 |
+
"... ... \n",
|
394 |
+
"1749 LPTENS–07/16\\nAbril de 2007\\nUna función de on... \n",
|
395 |
+
"1750 Microsoft Word - negEntr.doc\\nConsulta sobre t... \n",
|
396 |
+
"1751 Ausencia del problema de la quinta fuerza en u... \n",
|
397 |
+
"1752 Proyecto de versión 16 de noviembre de 2018\\nT... \n",
|
398 |
+
"1753 FERMILAB-PUB-07/076-E\\nBúsqueda de un bosón Hi... \n",
|
399 |
+
"\n",
|
400 |
+
" abstract \\\n",
|
401 |
+
"0 Describimos un nuevo algoritmo, el juego de ... \n",
|
402 |
+
"1 La evolución del sistema Tierra-Luna es desc... \n",
|
403 |
+
"2 Demostramos que un determinante de los númer... \n",
|
404 |
+
"3 En este artículo mostramos cómo calcular la ... \n",
|
405 |
+
"4 Una representación cuántica no estándar de l... \n",
|
406 |
+
"... ... \n",
|
407 |
+
"1749 Definimos una función de onda para los fondo... \n",
|
408 |
+
"1750 Después de que la temperatura negativa se vu... \n",
|
409 |
+
"1751 Un modelo de escala invariante que contiene ... \n",
|
410 |
+
"1752 Los haloes fríos de materia oscura están pob... \n",
|
411 |
+
"1753 Describimos una búsqueda para el modelo está... \n",
|
412 |
+
"\n",
|
413 |
+
" text_no_abstract \n",
|
414 |
+
"0 Introducción y preliminares\\nEl foco de este d... \n",
|
415 |
+
"1 Introducción \\nLa teoría aceptada popularmente... \n",
|
416 |
+
"2 Introducción El propósito principal de este ar... \n",
|
417 |
+
"3 DE DÍA A DÍA\\nWAEL ABU-SHAMMALA Y ALBERTO TORC... \n",
|
418 |
+
"4 La mecánica cuántica de polímeros y su límite ... \n",
|
419 |
+
"... ... \n",
|
420 |
+
"1749 Introducción\\nNuestro objetivo en este documen... \n",
|
421 |
+
"1750 Microsoft Word - negEntr.doc\\nConsulta sobre t... \n",
|
422 |
+
"1751 Introducción\\n\\tBase de Dos Medidas Teoría de ... \n",
|
423 |
+
"1752 Proyecto de versión 16 de noviembre de 2018\\nT... \n",
|
424 |
+
"1753 FERMILAB-PUB-07/076-E\\nBúsqueda de un bosón Hi... \n",
|
425 |
+
"\n",
|
426 |
+
"[1754 rows x 5 columns]"
|
427 |
+
]
|
428 |
+
},
|
429 |
+
"execution_count": 77,
|
430 |
+
"metadata": {},
|
431 |
+
"output_type": "execute_result"
|
432 |
+
}
|
433 |
+
],
|
434 |
+
"source": [
|
435 |
+
"es = df[['id','title','translated','translated_abstract','translated_no_abstract']]\n",
|
436 |
+
"es.columns = [\"id\",\"title\", \"full_text\",\"abstract\",\"text_no_abstract\"]\n",
|
437 |
+
"es.to_csv(CSV_PATH+'/scientific_paper_es.csv',index=False,encoding='utf-8')\n",
|
438 |
+
"es"
|
439 |
+
]
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"cell_type": "code",
|
443 |
+
"execution_count": 79,
|
444 |
+
"metadata": {},
|
445 |
+
"outputs": [
|
446 |
+
{
|
447 |
+
"data": {
|
448 |
+
"text/html": [
|
449 |
+
"<div>\n",
|
450 |
+
"<style scoped>\n",
|
451 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
452 |
+
" vertical-align: middle;\n",
|
453 |
+
" }\n",
|
454 |
+
"\n",
|
455 |
+
" .dataframe tbody tr th {\n",
|
456 |
+
" vertical-align: top;\n",
|
457 |
+
" }\n",
|
458 |
+
"\n",
|
459 |
+
" .dataframe thead th {\n",
|
460 |
+
" text-align: right;\n",
|
461 |
+
" }\n",
|
462 |
+
"</style>\n",
|
463 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
464 |
+
" <thead>\n",
|
465 |
+
" <tr style=\"text-align: right;\">\n",
|
466 |
+
" <th></th>\n",
|
467 |
+
" <th>id</th>\n",
|
468 |
+
" <th>title</th>\n",
|
469 |
+
" <th>full_text</th>\n",
|
470 |
+
" <th>abstract</th>\n",
|
471 |
+
" <th>text_no_abstract</th>\n",
|
472 |
+
" </tr>\n",
|
473 |
+
" </thead>\n",
|
474 |
+
" <tbody>\n",
|
475 |
+
" <tr>\n",
|
476 |
+
" <th>0</th>\n",
|
477 |
+
" <td>0704.0002</td>\n",
|
478 |
+
" <td>Sparsity-certifying Graph Decompositions</td>\n",
|
479 |
+
" <td>Sparsity-certifying Graph Decompositions\\nIlea...</td>\n",
|
480 |
+
" <td>We describe a new algorithm, the $(k,\\ell)$-...</td>\n",
|
481 |
+
" <td>Introduction and preliminaries\\nThe focus of t...</td>\n",
|
482 |
+
" </tr>\n",
|
483 |
+
" <tr>\n",
|
484 |
+
" <th>1</th>\n",
|
485 |
+
" <td>0704.0003</td>\n",
|
486 |
+
" <td>The evolution of the Earth-Moon system based o...</td>\n",
|
487 |
+
" <td>The evolution of the Earth-Moon system based o...</td>\n",
|
488 |
+
" <td>The evolution of Earth-Moon system is descri...</td>\n",
|
489 |
+
" <td>Introduction \\nThe popularly accepted theory f...</td>\n",
|
490 |
+
" </tr>\n",
|
491 |
+
" <tr>\n",
|
492 |
+
" <th>2</th>\n",
|
493 |
+
" <td>0704.0004</td>\n",
|
494 |
+
" <td>A determinant of Stirling cycle numbers counts...</td>\n",
|
495 |
+
" <td>A Determinant of Stirling Cycle Numbers Counts...</td>\n",
|
496 |
+
" <td>We show that a determinant of Stirling cycle...</td>\n",
|
497 |
+
" <td>Introduction The chief purpose of this paper i...</td>\n",
|
498 |
+
" </tr>\n",
|
499 |
+
" <tr>\n",
|
500 |
+
" <th>3</th>\n",
|
501 |
+
" <td>0704.0005</td>\n",
|
502 |
+
" <td>From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...</td>\n",
|
503 |
+
" <td>FROM DYADIC Λα TO Λα\\nWAEL ABU-SHAMMALA AND AL...</td>\n",
|
504 |
+
" <td>In this paper we show how to compute the $\\L...</td>\n",
|
505 |
+
" <td>FROM DYADIC Λα TO Λα\\nWAEL ABU-SHAMMALA AND AL...</td>\n",
|
506 |
+
" </tr>\n",
|
507 |
+
" <tr>\n",
|
508 |
+
" <th>4</th>\n",
|
509 |
+
" <td>0704.0007</td>\n",
|
510 |
+
" <td>Polymer Quantum Mechanics and its Continuum Limit</td>\n",
|
511 |
+
" <td>Polymer Quantum Mechanics and its Continuum Li...</td>\n",
|
512 |
+
" <td>A rather non-standard quantum representation...</td>\n",
|
513 |
+
" <td>Polymer Quantum Mechanics and its Continuum Li...</td>\n",
|
514 |
+
" </tr>\n",
|
515 |
+
" <tr>\n",
|
516 |
+
" <th>...</th>\n",
|
517 |
+
" <td>...</td>\n",
|
518 |
+
" <td>...</td>\n",
|
519 |
+
" <td>...</td>\n",
|
520 |
+
" <td>...</td>\n",
|
521 |
+
" <td>...</td>\n",
|
522 |
+
" </tr>\n",
|
523 |
+
" <tr>\n",
|
524 |
+
" <th>1749</th>\n",
|
525 |
+
" <td>0704.1996</td>\n",
|
526 |
+
" <td>A Wave-function for Stringy Universes</td>\n",
|
527 |
+
" <td>LPTENS–07/16\\nApril 2007\\nA Wave-function for ...</td>\n",
|
528 |
+
" <td>We define a wave-function for string theory ...</td>\n",
|
529 |
+
" <td>Introduction\\nOur goal in this paper is to emb...</td>\n",
|
530 |
+
" </tr>\n",
|
531 |
+
" <tr>\n",
|
532 |
+
" <th>1750</th>\n",
|
533 |
+
" <td>0704.1997</td>\n",
|
534 |
+
" <td>Query on Negative Temperature, Internal Intera...</td>\n",
|
535 |
+
" <td>Microsoft Word - negEntr.doc\\nQuery on Negativ...</td>\n",
|
536 |
+
" <td>After negative temperature is restated, we f...</td>\n",
|
537 |
+
" <td>Microsoft Word - negEntr.doc\\nQuery on Negativ...</td>\n",
|
538 |
+
" </tr>\n",
|
539 |
+
" <tr>\n",
|
540 |
+
" <th>1751</th>\n",
|
541 |
+
" <td>0704.1998</td>\n",
|
542 |
+
" <td>Absence of the Fifth Force Problem in a Model ...</td>\n",
|
543 |
+
" <td>Absence of the Fifth Force Problem in a Model ...</td>\n",
|
544 |
+
" <td>A scale invariant model containing dilaton $...</td>\n",
|
545 |
+
" <td>Introduction\\n\\tBasis of Two Measures Field Th...</td>\n",
|
546 |
+
" </tr>\n",
|
547 |
+
" <tr>\n",
|
548 |
+
" <th>1752</th>\n",
|
549 |
+
" <td>0704.1999</td>\n",
|
550 |
+
" <td>Dark matter caustics and the enhancement of se...</td>\n",
|
551 |
+
" <td>Draft version November 16, 2018\\nPreprint type...</td>\n",
|
552 |
+
" <td>Cold dark matter haloes are populated by cau...</td>\n",
|
553 |
+
" <td>Draft version November 16, 2018\\nPreprint type...</td>\n",
|
554 |
+
" </tr>\n",
|
555 |
+
" <tr>\n",
|
556 |
+
" <th>1753</th>\n",
|
557 |
+
" <td>0704.2000</td>\n",
|
558 |
+
" <td>Search for a Higgs boson produced in associati...</td>\n",
|
559 |
+
" <td>FERMILAB-PUB-07/076-E\\nSearch for a Higgs boso...</td>\n",
|
560 |
+
" <td>We describe a search for the standard model ...</td>\n",
|
561 |
+
" <td>FERMILAB-PUB-07/076-E\\nSearch for a Higgs boso...</td>\n",
|
562 |
+
" </tr>\n",
|
563 |
+
" </tbody>\n",
|
564 |
+
"</table>\n",
|
565 |
+
"<p>1754 rows × 5 columns</p>\n",
|
566 |
+
"</div>"
|
567 |
+
],
|
568 |
+
"text/plain": [
|
569 |
+
" id title \\\n",
|
570 |
+
"0 0704.0002 Sparsity-certifying Graph Decompositions \n",
|
571 |
+
"1 0704.0003 The evolution of the Earth-Moon system based o... \n",
|
572 |
+
"2 0704.0004 A determinant of Stirling cycle numbers counts... \n",
|
573 |
+
"3 0704.0005 From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a... \n",
|
574 |
+
"4 0704.0007 Polymer Quantum Mechanics and its Continuum Limit \n",
|
575 |
+
"... ... ... \n",
|
576 |
+
"1749 0704.1996 A Wave-function for Stringy Universes \n",
|
577 |
+
"1750 0704.1997 Query on Negative Temperature, Internal Intera... \n",
|
578 |
+
"1751 0704.1998 Absence of the Fifth Force Problem in a Model ... \n",
|
579 |
+
"1752 0704.1999 Dark matter caustics and the enhancement of se... \n",
|
580 |
+
"1753 0704.2000 Search for a Higgs boson produced in associati... \n",
|
581 |
+
"\n",
|
582 |
+
" full_text \\\n",
|
583 |
+
"0 Sparsity-certifying Graph Decompositions\\nIlea... \n",
|
584 |
+
"1 The evolution of the Earth-Moon system based o... \n",
|
585 |
+
"2 A Determinant of Stirling Cycle Numbers Counts... \n",
|
586 |
+
"3 FROM DYADIC Λα TO Λα\\nWAEL ABU-SHAMMALA AND AL... \n",
|
587 |
+
"4 Polymer Quantum Mechanics and its Continuum Li... \n",
|
588 |
+
"... ... \n",
|
589 |
+
"1749 LPTENS–07/16\\nApril 2007\\nA Wave-function for ... \n",
|
590 |
+
"1750 Microsoft Word - negEntr.doc\\nQuery on Negativ... \n",
|
591 |
+
"1751 Absence of the Fifth Force Problem in a Model ... \n",
|
592 |
+
"1752 Draft version November 16, 2018\\nPreprint type... \n",
|
593 |
+
"1753 FERMILAB-PUB-07/076-E\\nSearch for a Higgs boso... \n",
|
594 |
+
"\n",
|
595 |
+
" abstract \\\n",
|
596 |
+
"0 We describe a new algorithm, the $(k,\\ell)$-... \n",
|
597 |
+
"1 The evolution of Earth-Moon system is descri... \n",
|
598 |
+
"2 We show that a determinant of Stirling cycle... \n",
|
599 |
+
"3 In this paper we show how to compute the $\\L... \n",
|
600 |
+
"4 A rather non-standard quantum representation... \n",
|
601 |
+
"... ... \n",
|
602 |
+
"1749 We define a wave-function for string theory ... \n",
|
603 |
+
"1750 After negative temperature is restated, we f... \n",
|
604 |
+
"1751 A scale invariant model containing dilaton $... \n",
|
605 |
+
"1752 Cold dark matter haloes are populated by cau... \n",
|
606 |
+
"1753 We describe a search for the standard model ... \n",
|
607 |
+
"\n",
|
608 |
+
" text_no_abstract \n",
|
609 |
+
"0 Introduction and preliminaries\\nThe focus of t... \n",
|
610 |
+
"1 Introduction \\nThe popularly accepted theory f... \n",
|
611 |
+
"2 Introduction The chief purpose of this paper i... \n",
|
612 |
+
"3 FROM DYADIC Λα TO Λα\\nWAEL ABU-SHAMMALA AND AL... \n",
|
613 |
+
"4 Polymer Quantum Mechanics and its Continuum Li... \n",
|
614 |
+
"... ... \n",
|
615 |
+
"1749 Introduction\\nOur goal in this paper is to emb... \n",
|
616 |
+
"1750 Microsoft Word - negEntr.doc\\nQuery on Negativ... \n",
|
617 |
+
"1751 Introduction\\n\\tBasis of Two Measures Field Th... \n",
|
618 |
+
"1752 Draft version November 16, 2018\\nPreprint type... \n",
|
619 |
+
"1753 FERMILAB-PUB-07/076-E\\nSearch for a Higgs boso... \n",
|
620 |
+
"\n",
|
621 |
+
"[1754 rows x 5 columns]"
|
622 |
+
]
|
623 |
+
},
|
624 |
+
"execution_count": 79,
|
625 |
+
"metadata": {},
|
626 |
+
"output_type": "execute_result"
|
627 |
+
}
|
628 |
+
],
|
629 |
+
"source": [
|
630 |
+
"en = df[['id','title','full_text','abstract','text_no_abstract']]\n",
|
631 |
+
"en.columns = [\"id\",\"title\", \"full_text\",\"abstract\",\"text_no_abstract\"]\n",
|
632 |
+
"en.to_csv(CSV_PATH+'/scientific_paper_en.csv',index=False,encoding='utf-8')\n",
|
633 |
+
"en"
|
634 |
+
]
|
635 |
+
}
|
636 |
+
],
|
637 |
+
"metadata": {
|
638 |
+
"interpreter": {
|
639 |
+
"hash": "05def4d9d0834781cbeb6b95fd92421f8bd6a45e945308f90d88567f4afc1911"
|
640 |
+
},
|
641 |
+
"kernelspec": {
|
642 |
+
"display_name": "Python 3.8.12 ('tensorflow')",
|
643 |
+
"language": "python",
|
644 |
+
"name": "python3"
|
645 |
+
},
|
646 |
+
"language_info": {
|
647 |
+
"codemirror_mode": {
|
648 |
+
"name": "ipython",
|
649 |
+
"version": 3
|
650 |
+
},
|
651 |
+
"file_extension": ".py",
|
652 |
+
"mimetype": "text/x-python",
|
653 |
+
"name": "python",
|
654 |
+
"nbconvert_exporter": "python",
|
655 |
+
"pygments_lexer": "ipython3",
|
656 |
+
"version": "3.9.7"
|
657 |
+
},
|
658 |
+
"orig_nbformat": 4
|
659 |
+
},
|
660 |
+
"nbformat": 4,
|
661 |
+
"nbformat_minor": 2
|
662 |
+
}
|