Spaces:
Running
Running
Delete lookup.py
Browse files
lookup.py
DELETED
@@ -1,746 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python
|
2 |
-
#coding=utf-8
|
3 |
-
"""
|
4 |
-
arXivToBibTeX / arXivToWiki v7.2
|
5 |
-
©2009-2020 Sven-S. Porst / earthlingsoft <ssp-web@earthlingsoft.net>
|
6 |
-
|
7 |
-
Service available at: https://arxiv2bibtex.org
|
8 |
-
Source code available at: https://github.com/ssp/arXivToBibTeX
|
9 |
-
|
10 |
-
Originally created for Courant Research Centre
|
11 |
-
‘Higher Order Structures in Mathematics’ at the
|
12 |
-
Mathematics Institute at the University of Göttingen.
|
13 |
-
|
14 |
-
Links for form submission refer to the folder of the current path without a
|
15 |
-
further filename:
|
16 |
-
/?q=searchTerm
|
17 |
-
|
18 |
-
Your server setup (.htaccess file) needs to make sure that these requests are
|
19 |
-
redirected to the script.
|
20 |
-
"""
|
21 |
-
|
22 |
-
# import cgi
|
23 |
-
import re
|
24 |
-
import urllib
|
25 |
-
from urlparse import urlparse
|
26 |
-
from xml.etree import ElementTree
|
27 |
-
import xml.etree
|
28 |
-
import os
|
29 |
-
import sys
|
30 |
-
reload(sys)
|
31 |
-
sys.setdefaultencoding("utf-8")
|
32 |
-
|
33 |
-
#for debugging
|
34 |
-
#import cgitb
|
35 |
-
#cgitb.enable()
|
36 |
-
|
37 |
-
maxpapers = 100
|
38 |
-
|
39 |
-
trailingRE = re.compile(r"(.*)v[0-9]*$")
|
40 |
-
newStyleRE = re.compile(r"\d{4}\.?\d{4,}$")
|
41 |
-
sevenDigitsRE = re.compile(r"\d{7}$")
|
42 |
-
oldStyleIDRE = re.compile(r"[a-z-]+/\d{7}$")
|
43 |
-
paperIDRE = re.compile(r"([a-z-]+/\d{7}|\d{4}\.\d{4,})")
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
def prepareArXivID(ID):
|
48 |
-
"""
|
49 |
-
first, strip potentially trailing version numbers like v4
|
50 |
-
0909.1234 or 1504.12345-style ID => return unchanged
|
51 |
-
09091234 or 159412345-style ID => return 0909.1234 or 1504.12345
|
52 |
-
0606123-style ID => return math/0606123
|
53 |
-
non-math/0606123-style ID => return unchanged
|
54 |
-
anything else => return None
|
55 |
-
"""
|
56 |
-
myID = ID.strip()
|
57 |
-
myID = trailingRE.sub(r"\1", myID)
|
58 |
-
if newStyleRE.match(myID) != None:
|
59 |
-
""" An 8+ digit number (new-style): insert dot in the middle in case it's not there already. """
|
60 |
-
if re.match(r"\.", myID) == None:
|
61 |
-
myID = re.sub(r"(\d\d\d\d)(\d\d\d\d+)$", r"\1.\2", myID)
|
62 |
-
elif sevenDigitsRE.match(myID) != None:
|
63 |
-
""" Just seven digits: prepend math/ """
|
64 |
-
myID = "math/" + myID
|
65 |
-
elif oldStyleIDRE.match(myID) != None:
|
66 |
-
myID = myID
|
67 |
-
else:
|
68 |
-
myID = None
|
69 |
-
|
70 |
-
return myID
|
71 |
-
|
72 |
-
|
73 |
-
def extractPapersFromArXivUriPath(path):
|
74 |
-
"""
|
75 |
-
An arXiv URL was entered, extract the last component(s) as the paper ID
|
76 |
-
Match both old math.ph/9902123 and new 1705.12345 style path segments
|
77 |
-
"""
|
78 |
-
paperIDMatch = paperIDRE.search(path)
|
79 |
-
if paperIDMatch != None:
|
80 |
-
return paperIDMatch.string[paperIDMatch.start(1):paperIDMatch.end(1)]
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
def printAll(output):
|
85 |
-
print(output)
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
def printHtml(HTML, output_format):
|
90 |
-
if output_format == "html":
|
91 |
-
print(HTML)
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
def printPublicationsRaw(publications, format, outputformat):
|
97 |
-
if outputformat == "raw":
|
98 |
-
if format == "html":
|
99 |
-
print("\n\n".join(map(lambda publication: basicMarkupForHTMLEditing(publication), publications)))
|
100 |
-
elif format == "bibtex" or format == "biblatex":
|
101 |
-
print ("\n\n".join(map(lambda publication: markupForBibTeXItem(publication, format), publications)))
|
102 |
-
elif format == "bibitem":
|
103 |
-
print ("\n\n".join(map(lambda publication: markupForBibItem(publication), publications)))
|
104 |
-
elif format == "wiki":
|
105 |
-
print ("\n".join(map(lambda publication: markupForWikiItem(publication), publications)))
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
def escapeHTML(inputString):
|
111 |
-
"""
|
112 |
-
Input: string
|
113 |
-
Output: input string with < > & " replaced by their HTML character entities
|
114 |
-
"""
|
115 |
-
return cgi.escape(inputString, True)
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
def theForm(format, queryString):
|
120 |
-
"""
|
121 |
-
Returns string with HTML for the search form.
|
122 |
-
The form is pre-filled with the current query string.
|
123 |
-
"""
|
124 |
-
return '''
|
125 |
-
<form method="get" action="./">
|
126 |
-
<p>
|
127 |
-
<input type="text" name="q" class="q" autofocus="autofocus" placeholder="1510.01797 or courant_r_1" value="''' + escapeHTML(queryString) + '''"/>
|
128 |
-
<input type="hidden" name="format" id="formatinput" value="''' + escapeHTML(format) + '''"/>
|
129 |
-
<input type="submit" class="button" value="Retrieve Information"/>
|
130 |
-
</p>
|
131 |
-
</form>
|
132 |
-
'''
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
def outputformatToMimeType(outputformat):
|
137 |
-
if outputformat == "html":
|
138 |
-
return "text/html"
|
139 |
-
elif outputformat == "bibtex" or outputformat == "biblatex":
|
140 |
-
return "application/x-bibtex"
|
141 |
-
else:
|
142 |
-
return "text/plain"
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
def pageHead(queryString, format, outputformat):
|
147 |
-
"""
|
148 |
-
Returns string with HTML for the http header and the top of the HTML markup including CSS and JavaScript.
|
149 |
-
"""
|
150 |
-
if outputformat == "raw":
|
151 |
-
return "Content-type: " + outputformatToMimeType(outputformat) + "; charset=UTF-8\n"
|
152 |
-
else:
|
153 |
-
title = "arXiv To Wiki"
|
154 |
-
if isRunningFromBibTeXURI():
|
155 |
-
title = "arXiv To BibTeX"
|
156 |
-
elif isRunningFromHTMLURI():
|
157 |
-
title = "arXiv to HTML"
|
158 |
-
|
159 |
-
return """Content-type: text/html; charset=UTF-8
|
160 |
-
|
161 |
-
<!DOCTYPE html>
|
162 |
-
<html lang="en">
|
163 |
-
<head>
|
164 |
-
<title>""" + title + """</title>
|
165 |
-
<meta name='generator' content='arXiv to Wiki/BibTeX Converter, 2009-2015 by Sven-S. Porst (ssp-web@earthlingsoft.net).'/>
|
166 |
-
<meta name='description' content='Create BibTeX, HTML or Wiki markup for papers on the mathematics and physics preprint arXiv.'/>
|
167 |
-
<style>
|
168 |
-
* { margin: 0em; padding: 0em; }
|
169 |
-
body { width: 40em; font-family: Georgia, Times, serif; line-height: 141%; margin:auto; background: #eee;}
|
170 |
-
.clear { clear:both; }
|
171 |
-
#title { text-align:center; margin:3em 1em; }
|
172 |
-
p { margin: 0.5em 0em; }
|
173 |
-
a { text-decoration: none; color: #00d; }
|
174 |
-
a:hover { text-decoration: underline; color: #00f; }
|
175 |
-
a:visited { color: #606; }
|
176 |
-
a.editlink { color: #b00;}
|
177 |
-
h1 { font-size: 144%; margin: 0.5em;}
|
178 |
-
a h1 { color: #000; }
|
179 |
-
form { display:block; margin: 1em; }
|
180 |
-
form p { text-align:center; }
|
181 |
-
form input { font-size: 121%; }
|
182 |
-
form input.q { width: 60%; margin-bottom: 1em; }
|
183 |
-
form input.button { position:relative; bottom: 3px; }
|
184 |
-
h2 { font-size: 121%; margin:2em 0em 1em 0em; position:relative; }
|
185 |
-
h2:before { content: "\\002767"; position: absolute; width: 1em; left:-1em; font-size: 360%; color: #999; }
|
186 |
-
h2.error:before { content: "\\002718"; color: #f33; }
|
187 |
-
ul { padding-left: 2em; }
|
188 |
-
ul li { margin-bottom: 0.5em; }
|
189 |
-
.formatpicker { text-align: right; margin:1em 0em -1em 0em; }
|
190 |
-
.formatpicker ul { display: inline; list-style-type: none; padding: 0px; }
|
191 |
-
.formatpicker ul li { display: inline; margin-left: 0.5em; font-weight: normal; padding: 0em; }
|
192 |
-
.format { display: none; }
|
193 |
-
textarea { width:100%; }
|
194 |
-
.warning { font-style:italic; text-align:center; margin: 1em 0em; color: #900;}
|
195 |
-
#foot { font-size: 80%; font-style:italic; text-align: center; margin: 3em 0em 1em 0em; padding-top: 0.2em; border-top: #999 solid 1px; }
|
196 |
-
</style>
|
197 |
-
<script>
|
198 |
-
//<![CDATA[
|
199 |
-
function showType(type) {
|
200 |
-
var myTypes = ["bibtex", "biblatex", "bibitem", "html", "wiki"];
|
201 |
-
var myType = (!type || myTypes.indexOf(type) === -1) ? "wiki" : type;
|
202 |
-
document.getElementById("formatinput").value = myType;
|
203 |
-
for (var i = 0; i < myTypes.length; i++) {
|
204 |
-
var name = myTypes[i]
|
205 |
-
var linkID = name.concat("-link");
|
206 |
-
if (name === myType) {
|
207 |
-
document.getElementById(name).style.display = "block";
|
208 |
-
document.getElementById(linkID).style.fontWeight = "bold";
|
209 |
-
}
|
210 |
-
else {
|
211 |
-
document.getElementById(name).style.display = "none";
|
212 |
-
document.getElementById(linkID).style.fontWeight = "normal";
|
213 |
-
}
|
214 |
-
}
|
215 |
-
}
|
216 |
-
//]]>
|
217 |
-
</script>
|
218 |
-
</head>
|
219 |
-
<body onload="javascript:showType('""" + format + """');">
|
220 |
-
<div id="page">
|
221 |
-
<div id="title">
|
222 |
-
<h1><a href="./">Retrieve arXiv Information</a></h1>
|
223 |
-
</div>
|
224 |
-
""" + theForm(format, queryString)
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
def extraInfo():
|
230 |
-
"""
|
231 |
-
Returns string with HTML explaining what to enter into the form.
|
232 |
-
Displayed beneath the search field on pages without results.
|
233 |
-
"""
|
234 |
-
return """
|
235 |
-
<p>
|
236 |
-
Use the form above to get information for <a href="https://www.arxiv.org/">arXiv</a> submissions
|
237 |
-
for use in BibTeX, on web pages or in Wikis. You can enter:
|
238 |
-
</p>
|
239 |
-
<ul>
|
240 |
-
<li>
|
241 |
-
<p>
|
242 |
-
one or several <em>paper IDs</em> like “1510.01797” or “math/0506203”.
|
243 |
-
</p>
|
244 |
-
</li><li>
|
245 |
-
<p>
|
246 |
-
your <a href="https://arxiv.org/help/author_identifiers">arXiv <em>author ID</em></a>
|
247 |
-
looking similar to “grafvbothmer_h_1” to get a list of all your submitted papers.
|
248 |
-
</p>
|
249 |
-
</li>
|
250 |
-
<li>
|
251 |
-
<p>
|
252 |
-
your <a href="https://orcid.org">ORCID ID</a> looking similar to “0000-0003-0136-444X”
|
253 |
-
which you should register with your arXiv-account.
|
254 |
-
</p>
|
255 |
-
</li>
|
256 |
-
</ul>
|
257 |
-
"""
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
def pageFoot():
|
262 |
-
"""
|
263 |
-
Returns string with HTML for the bottom of the page.
|
264 |
-
"""
|
265 |
-
return """<div id="foot">
|
266 |
-
Data from <a href="https://arxiv.org/help/api/index">arXiv API</a>
|
267 |
-
· Site by <a href="https://earthlingsoft.net/ssp">Sven-S. Porst</a>
|
268 |
-
· <a href="https://github.com/ssp/arXivToWiki/issues">Feedback</a>
|
269 |
-
</div>
|
270 |
-
</div>
|
271 |
-
</body>
|
272 |
-
</html>
|
273 |
-
"""
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
def htmlMarkup(items, type):
|
279 |
-
"""
|
280 |
-
Input: items - List of publication dictionaries.
|
281 |
-
type - "Preprint" or "Published".
|
282 |
-
Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in HTML markup.
|
283 |
-
"""
|
284 |
-
markup = []
|
285 |
-
if len(items) > 0:
|
286 |
-
|
287 |
-
htmlMarkup = ["<ul>\n"]
|
288 |
-
for item in items:
|
289 |
-
htmlMarkup += ["<li>\n", escapeHTML(basicMarkupForHTMLEditing(item)), "\n</li>"]
|
290 |
-
htmlMarkup += ["\n</ul>"]
|
291 |
-
factor = 4
|
292 |
-
if type == "Published":
|
293 |
-
factor = 5
|
294 |
-
markup = ["<textarea class='htmlinfo' cols='70' rows='", str( factor * len(items) + 2), "'>\n"] + htmlMarkup + ["</textarea>\n"]
|
295 |
-
return markup
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
def basicMarkupForHTMLEditing(myDict):
|
301 |
-
"""
|
302 |
-
Input: myDict - dictionary with publication data.
|
303 |
-
Output: String with HTML markup for publication data.
|
304 |
-
"""
|
305 |
-
authors = myDict["authors"]
|
306 |
-
htmlauthors = []
|
307 |
-
for author in authors:
|
308 |
-
htmlauthors += [author]
|
309 |
-
output = [", ".join(htmlauthors), ': “', myDict["title"], '”, ', myDict["year"]]
|
310 |
-
if myDict["journal"] != None:
|
311 |
-
output += [", ", myDict["journal"]]
|
312 |
-
output += ["; <a href='", myDict["link"], "'>arXiv:", myDict["ID"], "</a>."]
|
313 |
-
if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
|
314 |
-
dois = []
|
315 |
-
for DOI in myDict["DOI"]:
|
316 |
-
dois += ["<a href='https://dx.doi.org/" + DOI + "'>" + DOI + "</a>"]
|
317 |
-
output += [" DOI: ", ", ".join(dois), "."]
|
318 |
-
|
319 |
-
return "".join(output)
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
def wikiMarkup(items, type):
|
325 |
-
"""
|
326 |
-
Input: items - List of publication dictionaries.
|
327 |
-
type - "Preprint" or "Publication".
|
328 |
-
Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in Wiki markup.
|
329 |
-
"""
|
330 |
-
markup = []
|
331 |
-
if len(items) > 0:
|
332 |
-
|
333 |
-
wikiMarkup = []
|
334 |
-
htmlMarkup = []
|
335 |
-
for item in items:
|
336 |
-
wikiMarkup += [markupForWikiItem(item), "\n\n"]
|
337 |
-
htmlMarkup += [basicMarkupForHTMLEditing(item)]
|
338 |
-
|
339 |
-
wikiMarkup[-1] = wikiMarkup[-1].strip("\n")
|
340 |
-
factor = 3
|
341 |
-
if type == "Published":
|
342 |
-
factor = 4
|
343 |
-
markup = ["<p>Preview:</p>\n", "<ul><li>" , "\n</li><li>".join(htmlMarkup), "</li></ul>\n", "<p class='clear'>For copy and pasting to a Wiki:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str( factor * len(items)), "'>\n"] + wikiMarkup + ["</textarea>\n"]
|
344 |
-
return markup
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
def markupForWikiItem(myDict):
|
350 |
-
"""
|
351 |
-
Input: dictionary with publication data.
|
352 |
-
Output: Wiki markup for publication data.
|
353 |
-
"""
|
354 |
-
authors = myDict["authors"]
|
355 |
-
wikiauthors = []
|
356 |
-
for author in authors:
|
357 |
-
wikiauthors += [author]
|
358 |
-
|
359 |
-
wikioutput = ["* ", ", ".join(wikiauthors), ': “', myDict["title"], '”, ', myDict["year"]]
|
360 |
-
if myDict["journal"] != None:
|
361 |
-
wikioutput += [", ", myDict["journal"]]
|
362 |
-
wikioutput += ["; [", myDict["link"], " arXiv:", myDict["ID"], "]."]
|
363 |
-
if myDict["DOI"] != None and len(myDict["DOI"]) > 0 :
|
364 |
-
dois = []
|
365 |
-
for DOI in myDict["DOI"]:
|
366 |
-
dois += ["[https://dx.doi.org/" + DOI + " " + DOI + "]"]
|
367 |
-
wikioutput += [" DOI: ", ", ".join(dois) , "."]
|
368 |
-
result = "".join(wikioutput)
|
369 |
-
result = re.sub(r"\s+", r" ", result)
|
370 |
-
return result
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
def bibTeXMarkup(items, format):
|
376 |
-
"""
|
377 |
-
Input: List of publication dictionaries.
|
378 |
-
Output: Array of strings containing HTML markup with a heading and a textarea full of BibTeX records.
|
379 |
-
"""
|
380 |
-
markup = []
|
381 |
-
if len(items) > 0:
|
382 |
-
linecount = 0
|
383 |
-
itemmarkup = []
|
384 |
-
for item in items:
|
385 |
-
bibtexmarkup = markupForBibTeXItem(item, format)
|
386 |
-
itemmarkup += [bibtexmarkup]
|
387 |
-
linecount += len(bibtexmarkup.split('\n'))
|
388 |
-
markup += ["<textarea class='wikiinfo' cols='70' rows='", str(linecount + len(items) - 1), "'>\n", "\n\n".join(itemmarkup), "</textarea>\n"]
|
389 |
-
return markup
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
def markupForBibTeXItem(myDict, format):
|
394 |
-
"""
|
395 |
-
Input: dictionary with publication data.
|
396 |
-
Output: BibTeX record for the preprint.
|
397 |
-
"""
|
398 |
-
bibTeXID = myDict["ID"]
|
399 |
-
bibTeXAuthors = " and ".join(myDict["authors"])
|
400 |
-
bibTeXTitle = myDict["title"]
|
401 |
-
bibTeXYear = myDict["year"]
|
402 |
-
|
403 |
-
hasDOI = myDict["DOI"] != None and len(myDict["DOI"]) > 0
|
404 |
-
hasJournal = myDict["journal"] != None
|
405 |
-
isPublished = hasJournal or hasDOI
|
406 |
-
|
407 |
-
publicationType = ("@online" if format == "biblatex" else "@misc") if not isPublished else "@article"
|
408 |
-
|
409 |
-
eprintPrefix = "" if format == "biblatex" else "arXiv:"
|
410 |
-
bibTeXEntry = [publicationType, "{", bibTeXID, ",\nAuthor = {", bibTeXAuthors, "},\nTitle = {", bibTeXTitle, "},\nYear = {", bibTeXYear, "},\nEprint = {", eprintPrefix, bibTeXID, "},\n"]
|
411 |
-
if format == "biblatex":
|
412 |
-
bibTeXEntry += ["Eprinttype = {arXiv},\n"]
|
413 |
-
if hasJournal:
|
414 |
-
bibTeXEntry += ["Howpublished = {", myDict["journal"], "},\n"]
|
415 |
-
if hasDOI:
|
416 |
-
bibTeXEntry += ["Doi = {", " ".join(myDict["DOI"]), "},\n"]
|
417 |
-
bibTeXEntry += ["}"]
|
418 |
-
result = "".join(bibTeXEntry)
|
419 |
-
return result
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
def bibItemMarkup(items):
|
424 |
-
"""
|
425 |
-
Input: List of publication dictionaries.
|
426 |
-
Output: Array of strings containing HTML markup with a heading and a textarea full of \bibitem commands.
|
427 |
-
"""
|
428 |
-
markup = []
|
429 |
-
if len(items) > 0:
|
430 |
-
linecount = 0
|
431 |
-
itemmarkup = []
|
432 |
-
for item in items:
|
433 |
-
bibItem = markupForBibItem(item)
|
434 |
-
itemmarkup += [bibItem]
|
435 |
-
linecount += len(bibItem.split('\n'))
|
436 |
-
markup = ["<p>Simple-minded \\bibitems:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str(linecount + 3), "'>\\begin{thebibliography}\n\n", "\n".join(itemmarkup), "\n\end{thebibliography}</textarea>\n"]
|
437 |
-
return markup
|
438 |
-
|
439 |
-
|
440 |
-
def markupForBibItem(myDict):
|
441 |
-
"""
|
442 |
-
Input: dictionary with publication data.
|
443 |
-
Output: LaTeX \bibitem command for the publication
|
444 |
-
"""
|
445 |
-
bibTeXID = myDict["ID"]
|
446 |
-
authors = myDict["authors"]
|
447 |
-
authorString = ""
|
448 |
-
if len(authors) == 1:
|
449 |
-
authorString = authors[0]
|
450 |
-
elif len(authors) > 1:
|
451 |
-
firstAuthors = authors[:-1]
|
452 |
-
lastAuthor = authors[-1]
|
453 |
-
authorString = ", ".join(firstAuthors) + " and " + lastAuthor
|
454 |
-
|
455 |
-
title = myDict["title"]
|
456 |
-
year = myDict["year"]
|
457 |
-
|
458 |
-
bibItemCommand = ["\\bibitem{", bibTeXID, "}\n", authorString, ".\n\\newblock ", title, ", ", year]
|
459 |
-
if myDict["journal"] != None:
|
460 |
-
bibItemCommand += [",\n\\newblock ", myDict["journal"]]
|
461 |
-
bibItemCommand += [";\n\\newblock arXiv:", bibTeXID, "."]
|
462 |
-
if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
|
463 |
-
bibItemCommand += ["\n\\newblock DOI: ", " ".join(myDict["DOI"]), "."]
|
464 |
-
result = "".join(bibItemCommand) + "\n"
|
465 |
-
return result
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
def errorMarkup(errorText):
|
472 |
-
"""
|
473 |
-
Return markup for the error text received.
|
474 |
-
"""
|
475 |
-
return """<h2 class="error">No results</h2>
|
476 |
-
<p>""" + errorText + """</p>
|
477 |
-
<p>If you think you entered a valid arXiv ID and you keep getting this error message, please accept our apologies and <a href="https://github.com/ssp/arXivToWiki/issues">let me know</a>.</p>
|
478 |
-
"""
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
def isRunningFromBibTeXURI():
|
483 |
-
return isInRequestURI("bibtex")
|
484 |
-
|
485 |
-
def isRunningFromHTMLURI():
|
486 |
-
return isInRequestURI("html")
|
487 |
-
|
488 |
-
def isInRequestURI(string):
|
489 |
-
return isInEnvironment("REQUEST_URI", string) or isInEnvironment("HTTP_HOST", string)
|
490 |
-
|
491 |
-
def isInEnvironment(fieldName, string):
|
492 |
-
if fieldName in os.environ:
|
493 |
-
if os.environ[fieldName].lower().find(string) != -1:
|
494 |
-
return True
|
495 |
-
return False
|
496 |
-
|
497 |
-
|
498 |
-
IDCleanerRE = re.compile(r"[^0-9]*([0-9]*)\.?([0-9]*)")
|
499 |
-
|
500 |
-
def comparePaperDictionaries (firstPaper, secondPaper):
|
501 |
-
"""
|
502 |
-
Compare paper dictionaries.
|
503 |
-
Earlier years are smaller.
|
504 |
-
Smaller IDs within a year are smaller.
|
505 |
-
"""
|
506 |
-
comparisonResult = 0
|
507 |
-
if firstPaper.has_key("year") and firstPaper.has_key("ID") and secondPaper.has_key("year") and secondPaper.has_key("ID"):
|
508 |
-
comparisonResult = cmp(firstPaper["year"], secondPaper["year"])
|
509 |
-
|
510 |
-
if comparisonResult == 0:
|
511 |
-
cleanedFirstID = int(IDCleanerRE.sub(r"\1\2", firstPaper["ID"]))
|
512 |
-
cleanedSecondID = int(IDCleanerRE.sub(r"\1\2", secondPaper["ID"]))
|
513 |
-
comparisonResult = cmp(cleanedFirstID, cleanedSecondID)
|
514 |
-
|
515 |
-
return comparisonResult
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
def processCgi(form):
|
520 |
-
queryString = ""
|
521 |
-
papers = []
|
522 |
-
personID = ""
|
523 |
-
if form.has_key("q"):
|
524 |
-
queryString = form["q"].value
|
525 |
-
papers = list(set(re.sub(r",", r" ", queryString).split()))
|
526 |
-
"""
|
527 |
-
for a single entry matching a regex we have an arXiv or ORCID autor ID
|
528 |
-
see https://arxiv.org/help/author_identifiers
|
529 |
-
"""
|
530 |
-
if len(papers) == 1:
|
531 |
-
arxivAuthorIDRegex = r"([a-z]*_[a-z]_[0-9]*)"
|
532 |
-
orcidIDRegex = r"((https://orcid.org/)?\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[0-9X])"
|
533 |
-
authorMatch = re.search(arxivAuthorIDRegex + "|" + orcidIDRegex, papers[0])
|
534 |
-
if authorMatch != None:
|
535 |
-
personID = authorMatch.string[authorMatch.start():authorMatch.end()]
|
536 |
-
urlParts = urlparse(queryString)
|
537 |
-
if urlParts.netloc == "arxiv.org":
|
538 |
-
fromUriPath = extractPapersFromArXivUriPath(urlParts.path)
|
539 |
-
if fromUriPath != None:
|
540 |
-
papers = [fromUriPath]
|
541 |
-
|
542 |
-
outputformat = "html"
|
543 |
-
if form.has_key("outputformat"):
|
544 |
-
of = form["outputformat"].value
|
545 |
-
if of in ["html", "raw"]:
|
546 |
-
outputformat = of
|
547 |
-
|
548 |
-
format = "wiki"
|
549 |
-
if isRunningFromBibTeXURI():
|
550 |
-
format = "bibtex"
|
551 |
-
elif isRunningFromHTMLURI():
|
552 |
-
format = "html"
|
553 |
-
if form.has_key("format"):
|
554 |
-
f = form["format"].value
|
555 |
-
if f in ["wiki", "bibtex", "biblatex", "bibitem", "html"]:
|
556 |
-
format = f
|
557 |
-
|
558 |
-
printAll(pageHead(queryString, format, outputformat))
|
559 |
-
|
560 |
-
if form.has_key("q"):
|
561 |
-
failedIDs = []
|
562 |
-
if personID == "":
|
563 |
-
arXivIDs = []
|
564 |
-
for paperID in papers:
|
565 |
-
processedID = prepareArXivID(paperID)
|
566 |
-
if processedID != None:
|
567 |
-
arXivIDs += [processedID]
|
568 |
-
else:
|
569 |
-
failedIDs += [paperID]
|
570 |
-
arXivURL = "https://export.arxiv.org/api/query?id_list=" + ",".join(arXivIDs) + "&max_results=" + str(maxpapers)
|
571 |
-
else:
|
572 |
-
arXivURL = "https://arxiv.org/a/" + personID + ".atom"
|
573 |
-
|
574 |
-
download = urllib.urlopen(arXivURL)
|
575 |
-
download.encoding = "UTF-8"
|
576 |
-
downloadedData = download.read()
|
577 |
-
if downloadedData == None:
|
578 |
-
printHtml(extraInfo(), outputformat)
|
579 |
-
printHtml(errorMarkup("The arXiv data could not be retrieved."), outputformat)
|
580 |
-
else:
|
581 |
-
publications = []
|
582 |
-
feed = xml.etree.ElementTree.fromstring(downloadedData)
|
583 |
-
output = []
|
584 |
-
|
585 |
-
""" Check for an error by looking at the title of the first paper: errors are marked by 'Error', empty feeds don't have a title """
|
586 |
-
firstTitle = feed.find("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}title")
|
587 |
-
if firstTitle == None or firstTitle.text == "Error":
|
588 |
-
lookupSubject = "paper ID"
|
589 |
-
if personID == "" and len(papers) > 1:
|
590 |
-
lookupSubject = "paper IDs"
|
591 |
-
elif personID != "":
|
592 |
-
lookupSubject = "author ID"
|
593 |
-
|
594 |
-
printHtml(extraInfo(), outputformat)
|
595 |
-
printHtml(errorMarkup("The arXiv did not return any results for the " + lookupSubject + " you entered. Any chance there may be a typo in there?"), outputformat)
|
596 |
-
else:
|
597 |
-
""" We got data and no error: Process it. """
|
598 |
-
papersiterator = feed.getiterator("{http://www.w3.org/2005/Atom}entry")
|
599 |
-
for paper in papersiterator:
|
600 |
-
titleElement = paper.find("{http://www.w3.org/2005/Atom}title")
|
601 |
-
if titleElement == None:
|
602 |
-
continue
|
603 |
-
theTitle = re.sub(r"\s*\n\s*", r" ", titleElement.text)
|
604 |
-
authors = paper.getiterator("{http://www.w3.org/2005/Atom}author")
|
605 |
-
theAuthors = []
|
606 |
-
for author in authors:
|
607 |
-
name = author.find("{http://www.w3.org/2005/Atom}name").text
|
608 |
-
theAuthors += [name]
|
609 |
-
theAbstract = paper.find("{http://www.w3.org/2005/Atom}summary").text.strip()
|
610 |
-
|
611 |
-
links = paper.getiterator("{http://www.w3.org/2005/Atom}link")
|
612 |
-
thePDF = ""
|
613 |
-
theLink = ""
|
614 |
-
for link in links:
|
615 |
-
attributes = link.attrib
|
616 |
-
if attributes.has_key("href"):
|
617 |
-
linktarget = attributes["href"]
|
618 |
-
linktype = attributes["type"] if attributes.has_key("type") else None
|
619 |
-
linktitle = attributes["title"] if attributes.has_key("title") else None
|
620 |
-
if linktype == "application/pdf":
|
621 |
-
thePDF = linktarget
|
622 |
-
elif linktype == "text/html":
|
623 |
-
theLink = linktarget
|
624 |
-
splitLink = theLink.split("/abs/")
|
625 |
-
theID = splitLink[-1].split('v')[0]
|
626 |
-
theLink = splitLink[0] + "/abs/" + theID
|
627 |
-
|
628 |
-
theYear = paper.find("{http://www.w3.org/2005/Atom}published").text.split('-')[0]
|
629 |
-
|
630 |
-
theDOIs = []
|
631 |
-
DOIs = paper.getiterator("{http://arxiv.org/schemas/atom}doi")
|
632 |
-
for DOI in DOIs:
|
633 |
-
theDOIs += [DOI.text]
|
634 |
-
|
635 |
-
journal = paper.find("{http://arxiv.org/schemas/atom}journal_ref")
|
636 |
-
theJournal = None
|
637 |
-
if journal != None:
|
638 |
-
theJournal = journal.text
|
639 |
-
|
640 |
-
publicationDict = dict({
|
641 |
-
"ID": theID,
|
642 |
-
"authors": theAuthors,
|
643 |
-
"title": theTitle,
|
644 |
-
"abstract": theAbstract,
|
645 |
-
"year": theYear,
|
646 |
-
"PDF": thePDF,
|
647 |
-
"link": theLink,
|
648 |
-
"DOI": theDOIs,
|
649 |
-
"journal": theJournal})
|
650 |
-
publications += [publicationDict]
|
651 |
-
|
652 |
-
preprintIDs = []
|
653 |
-
preprints = []
|
654 |
-
publishedIDs = []
|
655 |
-
published = []
|
656 |
-
|
657 |
-
publications.sort(comparePaperDictionaries, None, True)
|
658 |
-
|
659 |
-
for publication in publications:
|
660 |
-
if publication["journal"] != None:
|
661 |
-
published += [publication]
|
662 |
-
publishedIDs += [publication["ID"]]
|
663 |
-
else:
|
664 |
-
preprints += [publication]
|
665 |
-
preprintIDs += [publication["ID"]]
|
666 |
-
|
667 |
-
output += ["<div class='formatpicker'>Format:<ul class='outputtypes'>\n",
|
668 |
-
"""<li><a href='javascript:showType("bibtex");' id='bibtex-link'>BibTeX</a></li>\n""",
|
669 |
-
"""<li><a href='javascript:showType("biblatex");' id='biblatex-link'>BibLaTeX</a></li>\n""",
|
670 |
-
"""<li><a href='javascript:showType("bibitem");' id='bibitem-link'>\\bibitem</a></li>\n""",
|
671 |
-
"""<li><a href='javascript:showType("html");' id='html-link'>HTML</a></li>\n""",
|
672 |
-
"""<li><a href='javascript:showType("wiki");' id='wiki-link'>Wiki</a></li>\n""",
|
673 |
-
"</ul>\n</div>\n"]
|
674 |
-
|
675 |
-
if len(papers) >= maxpapers:
|
676 |
-
output += ["<div class='warning'>We can only process " + str(maxpapers) + " paper IDs at a time. " + str(len(papers) - maxpapers) + " of the IDs you entered were ignored.</div>"]
|
677 |
-
|
678 |
-
journalrefnote = """<p><em>Please <a class="editlink" href="https://arxiv.org/user/" title="Go to arXiv user page where you can edit the information stored for your papers.">add the journal reference and <abbr title="Document Object Identifier">DOI</abbr> for your papers as soon as they are published</a>.</em></p>"""
|
679 |
-
|
680 |
-
output += ["<div id='bibtex'>\n"]
|
681 |
-
if len(preprints) > 0:
|
682 |
-
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
683 |
-
output += bibTeXMarkup(preprints, "bibtex")
|
684 |
-
if len(published) > 0:
|
685 |
-
output += ["<h2>Published:</h2>\n"]
|
686 |
-
output += ["""<p>These BibTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
|
687 |
-
output += bibTeXMarkup(published, "bibtex")
|
688 |
-
output += ["</div>\n"]
|
689 |
-
|
690 |
-
output += ["<div id='biblatex'>\n"]
|
691 |
-
if len(preprints) > 0:
|
692 |
-
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
693 |
-
output += bibTeXMarkup(preprints, "biblatex")
|
694 |
-
if len(published) > 0:
|
695 |
-
output += ["<h2>Published:</h2>\n"]
|
696 |
-
output += ["""<p>These BibLaTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
|
697 |
-
output += bibTeXMarkup(published, "biblatex")
|
698 |
-
output += ["</div>\n"]
|
699 |
-
|
700 |
-
output += ["<div id='bibitem'>\n"]
|
701 |
-
if len(preprints) > 0:
|
702 |
-
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
703 |
-
output += bibItemMarkup(preprints)
|
704 |
-
if len(published) > 0:
|
705 |
-
output += ["<h2>Published:</h2>\n"]
|
706 |
-
output += bibItemMarkup(published)
|
707 |
-
output += ["</div>\n"]
|
708 |
-
|
709 |
-
output += ["<div id='html'>\n"]
|
710 |
-
if len(preprints) > 0:
|
711 |
-
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
712 |
-
output += htmlMarkup(preprints, "Preprint")
|
713 |
-
if len(published) > 0:
|
714 |
-
output += ["<h2>Published:</h2>\n"]
|
715 |
-
output += htmlMarkup(published, "Published")
|
716 |
-
output += ["</div>\n"]
|
717 |
-
|
718 |
-
output += ["<div id='wiki'>\n"]
|
719 |
-
if len(preprints) > 0:
|
720 |
-
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
721 |
-
output += wikiMarkup(preprints, "Preprint")
|
722 |
-
if len(published) > 0:
|
723 |
-
output += ["<h2>Published:</h2>\n"]
|
724 |
-
output += wikiMarkup(published, "Published")
|
725 |
-
output += ["</div>\n"]
|
726 |
-
|
727 |
-
|
728 |
-
if len(failedIDs) > 0:
|
729 |
-
if len(failedIDs) == 1:
|
730 |
-
printHtml("""<div class="warning">No paper with the ID “""" + failedIDs[0] + """” could be found on the arXiv.</div>\n""", outputformat)
|
731 |
-
else:
|
732 |
-
printHtml("""<div class="warning">The following paper IDs could not be found on the arXiv: """ + ", ".join(failedIDs) + """.</div>\n""", outputformat)
|
733 |
-
|
734 |
-
printHtml("".join(output), outputformat)
|
735 |
-
printPublicationsRaw(publications, format, outputformat)
|
736 |
-
else:
|
737 |
-
printHtml(extraInfo(), outputformat)
|
738 |
-
|
739 |
-
printHtml(pageFoot(), outputformat)
|
740 |
-
|
741 |
-
|
742 |
-
"""
|
743 |
-
MAIN SCRIPT *****************************************************************
|
744 |
-
"""
|
745 |
-
# form = cgi.FieldStorage()
|
746 |
-
# processCgi(form)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|