gagan3012 commited on
Commit
9b3d5c7
·
1 Parent(s): b1f46ad

Create lookup.py

Browse files
Files changed (1) hide show
  1. lookup.py +746 -0
lookup.py ADDED
@@ -0,0 +1,746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ #coding=utf-8
3
+ """
4
+ arXivToBibTeX / arXivToWiki v7.2
5
+ ©2009-2020 Sven-S. Porst / earthlingsoft <[email protected]>
6
+
7
+ Service available at: https://arxiv2bibtex.org
8
+ Source code available at: https://github.com/ssp/arXivToBibTeX
9
+
10
+ Originally created for Courant Research Centre
11
+ ‘Higher Order Structures in Mathematics’ at the
12
+ Mathematics Institute at the University of Göttingen.
13
+
14
+ Links for form submission refer to the folder of the current path without a
15
+ further filename:
16
+ /?q=searchTerm
17
+
18
+ Your server setup (.htaccess file) needs to make sure that these requests are
19
+ redirected to the script.
20
+ """
21
+
22
+ # import cgi
23
+ import re
24
+ import urllib
25
+ from urlparse import urlparse
26
+ from xml.etree import ElementTree
27
+ import xml.etree
28
+ import os
29
+ import sys
30
+ reload(sys)
31
+ sys.setdefaultencoding("utf-8")
32
+
33
+ #for debugging
34
+ #import cgitb
35
+ #cgitb.enable()
36
+
37
+ maxpapers = 100
38
+
39
+ trailingRE = re.compile(r"(.*)v[0-9]*$")
40
+ newStyleRE = re.compile(r"\d{4}\.?\d{4,}$")
41
+ sevenDigitsRE = re.compile(r"\d{7}$")
42
+ oldStyleIDRE = re.compile(r"[a-z-]+/\d{7}$")
43
+ paperIDRE = re.compile(r"([a-z-]+/\d{7}|\d{4}\.\d{4,})")
44
+
45
+
46
+
47
+ def prepareArXivID(ID):
48
+ """
49
+ first, strip potentially trailing version numbers like v4
50
+ 0909.1234 or 1504.12345-style ID => return unchanged
51
+ 09091234 or 159412345-style ID => return 0909.1234 or 1504.12345
52
+ 0606123-style ID => return math/0606123
53
+ non-math/0606123-style ID => return unchanged
54
+ anything else => return None
55
+ """
56
+ myID = ID.strip()
57
+ myID = trailingRE.sub(r"\1", myID)
58
+ if newStyleRE.match(myID) != None:
59
+ """ An 8+ digit number (new-style): insert dot in the middle in case it's not there already. """
60
+ if re.match(r"\.", myID) == None:
61
+ myID = re.sub(r"(\d\d\d\d)(\d\d\d\d+)$", r"\1.\2", myID)
62
+ elif sevenDigitsRE.match(myID) != None:
63
+ """ Just seven digits: prepend math/ """
64
+ myID = "math/" + myID
65
+ elif oldStyleIDRE.match(myID) != None:
66
+ myID = myID
67
+ else:
68
+ myID = None
69
+
70
+ return myID
71
+
72
+
73
+ def extractPapersFromArXivUriPath(path):
74
+ """
75
+ An arXiv URL was entered, extract the last component(s) as the paper ID
76
+ Match both old math.ph/9902123 and new 1705.12345 style path segments
77
+ """
78
+ paperIDMatch = paperIDRE.search(path)
79
+ if paperIDMatch != None:
80
+ return paperIDMatch.string[paperIDMatch.start(1):paperIDMatch.end(1)]
81
+
82
+
83
+
84
+ def printAll(output):
85
+ print output
86
+
87
+
88
+
89
+ def printHtml(html, outputformat):
90
+ if outputformat == "html":
91
+ print html
92
+
93
+
94
+
95
+
96
+ def printPublicationsRaw(publications, format, outputformat):
97
+ if outputformat == "raw":
98
+ if format == "html":
99
+ print "\n\n".join(map(lambda publication: basicMarkupForHTMLEditing(publication), publications))
100
+ elif format == "bibtex" or format == "biblatex":
101
+ print "\n\n".join(map(lambda publication: markupForBibTeXItem(publication, format), publications))
102
+ elif format == "bibitem":
103
+ print "\n\n".join(map(lambda publication: markupForBibItem(publication), publications))
104
+ elif format == "wiki":
105
+ print "\n".join(map(lambda publication: markupForWikiItem(publication), publications))
106
+
107
+
108
+
109
+
110
+ def escapeHTML(inputString):
111
+ """
112
+ Input: string
113
+ Output: input string with < > & " replaced by their HTML character entities
114
+ """
115
+ return cgi.escape(inputString, True)
116
+
117
+
118
+
119
+ def theForm(format, queryString):
120
+ """
121
+ Returns string with HTML for the search form.
122
+ The form is pre-filled with the current query string.
123
+ """
124
+ return '''
125
+ <form method="get" action="./">
126
+ <p>
127
+ <input type="text" name="q" class="q" autofocus="autofocus" placeholder="1510.01797 or courant_r_1" value="''' + escapeHTML(queryString) + '''"/>
128
+ <input type="hidden" name="format" id="formatinput" value="''' + escapeHTML(format) + '''"/>
129
+ <input type="submit" class="button" value="Retrieve Information"/>
130
+ </p>
131
+ </form>
132
+ '''
133
+
134
+
135
+
136
+ def outputformatToMimeType(outputformat):
137
+ if outputformat == "html":
138
+ return "text/html"
139
+ elif outputformat == "bibtex" or outputformat == "biblatex":
140
+ return "application/x-bibtex"
141
+ else:
142
+ return "text/plain"
143
+
144
+
145
+
146
+ def pageHead(queryString, format, outputformat):
147
+ """
148
+ Returns string with HTML for the http header and the top of the HTML markup including CSS and JavaScript.
149
+ """
150
+ if outputformat == "raw":
151
+ return "Content-type: " + outputformatToMimeType(outputformat) + "; charset=UTF-8\n"
152
+ else:
153
+ title = "arXiv To Wiki"
154
+ if isRunningFromBibTeXURI():
155
+ title = "arXiv To BibTeX"
156
+ elif isRunningFromHTMLURI():
157
+ title = "arXiv to HTML"
158
+
159
+ return """Content-type: text/html; charset=UTF-8
160
+
161
+ <!DOCTYPE html>
162
+ <html lang="en">
163
+ <head>
164
+ <title>""" + title + """</title>
165
+ <meta name='generator' content='arXiv to Wiki/BibTeX Converter, 2009-2015 by Sven-S. Porst ([email protected]).'/>
166
+ <meta name='description' content='Create BibTeX, HTML or Wiki markup for papers on the mathematics and physics preprint arXiv.'/>
167
+ <style>
168
+ * { margin: 0em; padding: 0em; }
169
+ body { width: 40em; font-family: Georgia, Times, serif; line-height: 141%; margin:auto; background: #eee;}
170
+ .clear { clear:both; }
171
+ #title { text-align:center; margin:3em 1em; }
172
+ p { margin: 0.5em 0em; }
173
+ a { text-decoration: none; color: #00d; }
174
+ a:hover { text-decoration: underline; color: #00f; }
175
+ a:visited { color: #606; }
176
+ a.editlink { color: #b00;}
177
+ h1 { font-size: 144%; margin: 0.5em;}
178
+ a h1 { color: #000; }
179
+ form { display:block; margin: 1em; }
180
+ form p { text-align:center; }
181
+ form input { font-size: 121%; }
182
+ form input.q { width: 60%; margin-bottom: 1em; }
183
+ form input.button { position:relative; bottom: 3px; }
184
+ h2 { font-size: 121%; margin:2em 0em 1em 0em; position:relative; }
185
+ h2:before { content: "\\002767"; position: absolute; width: 1em; left:-1em; font-size: 360%; color: #999; }
186
+ h2.error:before { content: "\\002718"; color: #f33; }
187
+ ul { padding-left: 2em; }
188
+ ul li { margin-bottom: 0.5em; }
189
+ .formatpicker { text-align: right; margin:1em 0em -1em 0em; }
190
+ .formatpicker ul { display: inline; list-style-type: none; padding: 0px; }
191
+ .formatpicker ul li { display: inline; margin-left: 0.5em; font-weight: normal; padding: 0em; }
192
+ .format { display: none; }
193
+ textarea { width:100%; }
194
+ .warning { font-style:italic; text-align:center; margin: 1em 0em; color: #900;}
195
+ #foot { font-size: 80%; font-style:italic; text-align: center; margin: 3em 0em 1em 0em; padding-top: 0.2em; border-top: #999 solid 1px; }
196
+ </style>
197
+ <script>
198
+ //<![CDATA[
199
+ function showType(type) {
200
+ var myTypes = ["bibtex", "biblatex", "bibitem", "html", "wiki"];
201
+ var myType = (!type || myTypes.indexOf(type) === -1) ? "wiki" : type;
202
+ document.getElementById("formatinput").value = myType;
203
+ for (var i = 0; i < myTypes.length; i++) {
204
+ var name = myTypes[i]
205
+ var linkID = name.concat("-link");
206
+ if (name === myType) {
207
+ document.getElementById(name).style.display = "block";
208
+ document.getElementById(linkID).style.fontWeight = "bold";
209
+ }
210
+ else {
211
+ document.getElementById(name).style.display = "none";
212
+ document.getElementById(linkID).style.fontWeight = "normal";
213
+ }
214
+ }
215
+ }
216
+ //]]>
217
+ </script>
218
+ </head>
219
+ <body onload="javascript:showType('""" + format + """');">
220
+ <div id="page">
221
+ <div id="title">
222
+ <h1><a href="./">Retrieve arXiv Information</a></h1>
223
+ </div>
224
+ """ + theForm(format, queryString)
225
+
226
+
227
+
228
+
229
+ def extraInfo():
230
+ """
231
+ Returns string with HTML explaining what to enter into the form.
232
+ Displayed beneath the search field on pages without results.
233
+ """
234
+ return """
235
+ <p>
236
+ Use the form above to get information for <a href="https://www.arxiv.org/">arXiv</a> submissions
237
+ for use in BibTeX, on web pages or in Wikis. You can enter:
238
+ </p>
239
+ <ul>
240
+ <li>
241
+ <p>
242
+ one or several <em>paper IDs</em> like “1510.01797” or “math/0506203”.
243
+ </p>
244
+ </li><li>
245
+ <p>
246
+ your <a href="https://arxiv.org/help/author_identifiers">arXiv <em>author ID</em></a>
247
+ looking similar to “grafvbothmer_h_1” to get a list of all your submitted papers.
248
+ </p>
249
+ </li>
250
+ <li>
251
+ <p>
252
+ your <a href="https://orcid.org">ORCID ID</a> looking similar to “0000-0003-0136-444X”
253
+ which you should register with your arXiv-account.
254
+ </p>
255
+ </li>
256
+ </ul>
257
+ """
258
+
259
+
260
+
261
+ def pageFoot():
262
+ """
263
+ Returns string with HTML for the bottom of the page.
264
+ """
265
+ return """<div id="foot">
266
+ Data from <a href="https://arxiv.org/help/api/index">arXiv API</a>
267
+ · Site by <a href="https://earthlingsoft.net/ssp">Sven-S. Porst</a>
268
+ · <a href="https://github.com/ssp/arXivToWiki/issues">Feedback</a>
269
+ </div>
270
+ </div>
271
+ </body>
272
+ </html>
273
+ """
274
+
275
+
276
+
277
+
278
+ def htmlMarkup(items, type):
279
+ """
280
+ Input: items - List of publication dictionaries.
281
+ type - "Preprint" or "Published".
282
+ Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in HTML markup.
283
+ """
284
+ markup = []
285
+ if len(items) > 0:
286
+
287
+ htmlMarkup = ["<ul>\n"]
288
+ for item in items:
289
+ htmlMarkup += ["<li>\n", escapeHTML(basicMarkupForHTMLEditing(item)), "\n</li>"]
290
+ htmlMarkup += ["\n</ul>"]
291
+ factor = 4
292
+ if type == "Published":
293
+ factor = 5
294
+ markup = ["<textarea class='htmlinfo' cols='70' rows='", str( factor * len(items) + 2), "'>\n"] + htmlMarkup + ["</textarea>\n"]
295
+ return markup
296
+
297
+
298
+
299
+
300
+ def basicMarkupForHTMLEditing(myDict):
301
+ """
302
+ Input: myDict - dictionary with publication data.
303
+ Output: String with HTML markup for publication data.
304
+ """
305
+ authors = myDict["authors"]
306
+ htmlauthors = []
307
+ for author in authors:
308
+ htmlauthors += [author]
309
+ output = [", ".join(htmlauthors), ': “', myDict["title"], '”, ', myDict["year"]]
310
+ if myDict["journal"] != None:
311
+ output += [", ", myDict["journal"]]
312
+ output += ["; <a href='", myDict["link"], "'>arXiv:", myDict["ID"], "</a>."]
313
+ if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
314
+ dois = []
315
+ for DOI in myDict["DOI"]:
316
+ dois += ["<a href='https://dx.doi.org/" + DOI + "'>" + DOI + "</a>"]
317
+ output += [" DOI: ", ", ".join(dois), "."]
318
+
319
+ return "".join(output)
320
+
321
+
322
+
323
+
324
+ def wikiMarkup(items, type):
325
+ """
326
+ Input: items - List of publication dictionaries.
327
+ type - "Preprint" or "Publication".
328
+ Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in Wiki markup.
329
+ """
330
+ markup = []
331
+ if len(items) > 0:
332
+
333
+ wikiMarkup = []
334
+ htmlMarkup = []
335
+ for item in items:
336
+ wikiMarkup += [markupForWikiItem(item), "\n\n"]
337
+ htmlMarkup += [basicMarkupForHTMLEditing(item)]
338
+
339
+ wikiMarkup[-1] = wikiMarkup[-1].strip("\n")
340
+ factor = 3
341
+ if type == "Published":
342
+ factor = 4
343
+ markup = ["<p>Preview:</p>\n", "<ul><li>" , "\n</li><li>".join(htmlMarkup), "</li></ul>\n", "<p class='clear'>For copy and pasting to a Wiki:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str( factor * len(items)), "'>\n"] + wikiMarkup + ["</textarea>\n"]
344
+ return markup
345
+
346
+
347
+
348
+
349
+ def markupForWikiItem(myDict):
350
+ """
351
+ Input: dictionary with publication data.
352
+ Output: Wiki markup for publication data.
353
+ """
354
+ authors = myDict["authors"]
355
+ wikiauthors = []
356
+ for author in authors:
357
+ wikiauthors += [author]
358
+
359
+ wikioutput = ["* ", ", ".join(wikiauthors), ': “', myDict["title"], '”, ', myDict["year"]]
360
+ if myDict["journal"] != None:
361
+ wikioutput += [", ", myDict["journal"]]
362
+ wikioutput += ["; [", myDict["link"], " arXiv:", myDict["ID"], "]."]
363
+ if myDict["DOI"] != None and len(myDict["DOI"]) > 0 :
364
+ dois = []
365
+ for DOI in myDict["DOI"]:
366
+ dois += ["[https://dx.doi.org/" + DOI + " " + DOI + "]"]
367
+ wikioutput += [" DOI: ", ", ".join(dois) , "."]
368
+ result = "".join(wikioutput)
369
+ result = re.sub(r"\s+", r" ", result)
370
+ return result
371
+
372
+
373
+
374
+
375
+ def bibTeXMarkup(items, format):
376
+ """
377
+ Input: List of publication dictionaries.
378
+ Output: Array of strings containing HTML markup with a heading and a textarea full of BibTeX records.
379
+ """
380
+ markup = []
381
+ if len(items) > 0:
382
+ linecount = 0
383
+ itemmarkup = []
384
+ for item in items:
385
+ bibtexmarkup = markupForBibTeXItem(item, format)
386
+ itemmarkup += [bibtexmarkup]
387
+ linecount += len(bibtexmarkup.split('\n'))
388
+ markup += ["<textarea class='wikiinfo' cols='70' rows='", str(linecount + len(items) - 1), "'>\n", "\n\n".join(itemmarkup), "</textarea>\n"]
389
+ return markup
390
+
391
+
392
+
393
+ def markupForBibTeXItem(myDict, format):
394
+ """
395
+ Input: dictionary with publication data.
396
+ Output: BibTeX record for the preprint.
397
+ """
398
+ bibTeXID = myDict["ID"]
399
+ bibTeXAuthors = " and ".join(myDict["authors"])
400
+ bibTeXTitle = myDict["title"]
401
+ bibTeXYear = myDict["year"]
402
+
403
+ hasDOI = myDict["DOI"] != None and len(myDict["DOI"]) > 0
404
+ hasJournal = myDict["journal"] != None
405
+ isPublished = hasJournal or hasDOI
406
+
407
+ publicationType = ("@online" if format == "biblatex" else "@misc") if not isPublished else "@article"
408
+
409
+ eprintPrefix = "" if format == "biblatex" else "arXiv:"
410
+ bibTeXEntry = [publicationType, "{", bibTeXID, ",\nAuthor = {", bibTeXAuthors, "},\nTitle = {", bibTeXTitle, "},\nYear = {", bibTeXYear, "},\nEprint = {", eprintPrefix, bibTeXID, "},\n"]
411
+ if format == "biblatex":
412
+ bibTeXEntry += ["Eprinttype = {arXiv},\n"]
413
+ if hasJournal:
414
+ bibTeXEntry += ["Howpublished = {", myDict["journal"], "},\n"]
415
+ if hasDOI:
416
+ bibTeXEntry += ["Doi = {", " ".join(myDict["DOI"]), "},\n"]
417
+ bibTeXEntry += ["}"]
418
+ result = "".join(bibTeXEntry)
419
+ return result
420
+
421
+
422
+
423
+ def bibItemMarkup(items):
424
+ """
425
+ Input: List of publication dictionaries.
426
+ Output: Array of strings containing HTML markup with a heading and a textarea full of \bibitem commands.
427
+ """
428
+ markup = []
429
+ if len(items) > 0:
430
+ linecount = 0
431
+ itemmarkup = []
432
+ for item in items:
433
+ bibItem = markupForBibItem(item)
434
+ itemmarkup += [bibItem]
435
+ linecount += len(bibItem.split('\n'))
436
+ markup = ["<p>Simple-minded \\bibitems:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str(linecount + 3), "'>\\begin{thebibliography}\n\n", "\n".join(itemmarkup), "\n\end{thebibliography}</textarea>\n"]
437
+ return markup
438
+
439
+
440
+ def markupForBibItem(myDict):
441
+ """
442
+ Input: dictionary with publication data.
443
+ Output: LaTeX \bibitem command for the publication
444
+ """
445
+ bibTeXID = myDict["ID"]
446
+ authors = myDict["authors"]
447
+ authorString = ""
448
+ if len(authors) == 1:
449
+ authorString = authors[0]
450
+ elif len(authors) > 1:
451
+ firstAuthors = authors[:-1]
452
+ lastAuthor = authors[-1]
453
+ authorString = ", ".join(firstAuthors) + " and " + lastAuthor
454
+
455
+ title = myDict["title"]
456
+ year = myDict["year"]
457
+
458
+ bibItemCommand = ["\\bibitem{", bibTeXID, "}\n", authorString, ".\n\\newblock ", title, ", ", year]
459
+ if myDict["journal"] != None:
460
+ bibItemCommand += [",\n\\newblock ", myDict["journal"]]
461
+ bibItemCommand += [";\n\\newblock arXiv:", bibTeXID, "."]
462
+ if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
463
+ bibItemCommand += ["\n\\newblock DOI: ", " ".join(myDict["DOI"]), "."]
464
+ result = "".join(bibItemCommand) + "\n"
465
+ return result
466
+
467
+
468
+
469
+
470
+
471
+ def errorMarkup(errorText):
472
+ """
473
+ Return markup for the error text received.
474
+ """
475
+ return """<h2 class="error">No results</h2>
476
+ <p>""" + errorText + """</p>
477
+ <p>If you think you entered a valid arXiv ID and you keep getting this error message, please accept our apologies and <a href="https://github.com/ssp/arXivToWiki/issues">let me know</a>.</p>
478
+ """
479
+
480
+
481
+
482
+ def isRunningFromBibTeXURI():
483
+ return isInRequestURI("bibtex")
484
+
485
+ def isRunningFromHTMLURI():
486
+ return isInRequestURI("html")
487
+
488
+ def isInRequestURI(string):
489
+ return isInEnvironment("REQUEST_URI", string) or isInEnvironment("HTTP_HOST", string)
490
+
491
+ def isInEnvironment(fieldName, string):
492
+ if fieldName in os.environ:
493
+ if os.environ[fieldName].lower().find(string) != -1:
494
+ return True
495
+ return False
496
+
497
+
498
+ IDCleanerRE = re.compile(r"[^0-9]*([0-9]*)\.?([0-9]*)")
499
+
500
+ def comparePaperDictionaries (firstPaper, secondPaper):
501
+ """
502
+ Compare paper dictionaries.
503
+ Earlier years are smaller.
504
+ Smaller IDs within a year are smaller.
505
+ """
506
+ comparisonResult = 0
507
+ if firstPaper.has_key("year") and firstPaper.has_key("ID") and secondPaper.has_key("year") and secondPaper.has_key("ID"):
508
+ comparisonResult = cmp(firstPaper["year"], secondPaper["year"])
509
+
510
+ if comparisonResult == 0:
511
+ cleanedFirstID = int(IDCleanerRE.sub(r"\1\2", firstPaper["ID"]))
512
+ cleanedSecondID = int(IDCleanerRE.sub(r"\1\2", secondPaper["ID"]))
513
+ comparisonResult = cmp(cleanedFirstID, cleanedSecondID)
514
+
515
+ return comparisonResult
516
+
517
+
518
+
519
+ def processCgi(form):
520
+ queryString = ""
521
+ papers = []
522
+ personID = ""
523
+ if form.has_key("q"):
524
+ queryString = form["q"].value
525
+ papers = list(set(re.sub(r",", r" ", queryString).split()))
526
+ """
527
+ for a single entry matching a regex we have an arXiv or ORCID autor ID
528
+ see https://arxiv.org/help/author_identifiers
529
+ """
530
+ if len(papers) == 1:
531
+ arxivAuthorIDRegex = r"([a-z]*_[a-z]_[0-9]*)"
532
+ orcidIDRegex = r"((https://orcid.org/)?\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[0-9X])"
533
+ authorMatch = re.search(arxivAuthorIDRegex + "|" + orcidIDRegex, papers[0])
534
+ if authorMatch != None:
535
+ personID = authorMatch.string[authorMatch.start():authorMatch.end()]
536
+ urlParts = urlparse(queryString)
537
+ if urlParts.netloc == "arxiv.org":
538
+ fromUriPath = extractPapersFromArXivUriPath(urlParts.path)
539
+ if fromUriPath != None:
540
+ papers = [fromUriPath]
541
+
542
+ outputformat = "html"
543
+ if form.has_key("outputformat"):
544
+ of = form["outputformat"].value
545
+ if of in ["html", "raw"]:
546
+ outputformat = of
547
+
548
+ format = "wiki"
549
+ if isRunningFromBibTeXURI():
550
+ format = "bibtex"
551
+ elif isRunningFromHTMLURI():
552
+ format = "html"
553
+ if form.has_key("format"):
554
+ f = form["format"].value
555
+ if f in ["wiki", "bibtex", "biblatex", "bibitem", "html"]:
556
+ format = f
557
+
558
+ printAll(pageHead(queryString, format, outputformat))
559
+
560
+ if form.has_key("q"):
561
+ failedIDs = []
562
+ if personID == "":
563
+ arXivIDs = []
564
+ for paperID in papers:
565
+ processedID = prepareArXivID(paperID)
566
+ if processedID != None:
567
+ arXivIDs += [processedID]
568
+ else:
569
+ failedIDs += [paperID]
570
+ arXivURL = "https://export.arxiv.org/api/query?id_list=" + ",".join(arXivIDs) + "&max_results=" + str(maxpapers)
571
+ else:
572
+ arXivURL = "https://arxiv.org/a/" + personID + ".atom"
573
+
574
+ download = urllib.urlopen(arXivURL)
575
+ download.encoding = "UTF-8"
576
+ downloadedData = download.read()
577
+ if downloadedData == None:
578
+ printHtml(extraInfo(), outputformat)
579
+ printHtml(errorMarkup("The arXiv data could not be retrieved."), outputformat)
580
+ else:
581
+ publications = []
582
+ feed = xml.etree.ElementTree.fromstring(downloadedData)
583
+ output = []
584
+
585
+ """ Check for an error by looking at the title of the first paper: errors are marked by 'Error', empty feeds don't have a title """
586
+ firstTitle = feed.find("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}title")
587
+ if firstTitle == None or firstTitle.text == "Error":
588
+ lookupSubject = "paper ID"
589
+ if personID == "" and len(papers) > 1:
590
+ lookupSubject = "paper IDs"
591
+ elif personID != "":
592
+ lookupSubject = "author ID"
593
+
594
+ printHtml(extraInfo(), outputformat)
595
+ printHtml(errorMarkup("The arXiv did not return any results for the " + lookupSubject + " you entered. Any chance there may be a typo in there?"), outputformat)
596
+ else:
597
+ """ We got data and no error: Process it. """
598
+ papersiterator = feed.getiterator("{http://www.w3.org/2005/Atom}entry")
599
+ for paper in papersiterator:
600
+ titleElement = paper.find("{http://www.w3.org/2005/Atom}title")
601
+ if titleElement == None:
602
+ continue
603
+ theTitle = re.sub(r"\s*\n\s*", r" ", titleElement.text)
604
+ authors = paper.getiterator("{http://www.w3.org/2005/Atom}author")
605
+ theAuthors = []
606
+ for author in authors:
607
+ name = author.find("{http://www.w3.org/2005/Atom}name").text
608
+ theAuthors += [name]
609
+ theAbstract = paper.find("{http://www.w3.org/2005/Atom}summary").text.strip()
610
+
611
+ links = paper.getiterator("{http://www.w3.org/2005/Atom}link")
612
+ thePDF = ""
613
+ theLink = ""
614
+ for link in links:
615
+ attributes = link.attrib
616
+ if attributes.has_key("href"):
617
+ linktarget = attributes["href"]
618
+ linktype = attributes["type"] if attributes.has_key("type") else None
619
+ linktitle = attributes["title"] if attributes.has_key("title") else None
620
+ if linktype == "application/pdf":
621
+ thePDF = linktarget
622
+ elif linktype == "text/html":
623
+ theLink = linktarget
624
+ splitLink = theLink.split("/abs/")
625
+ theID = splitLink[-1].split('v')[0]
626
+ theLink = splitLink[0] + "/abs/" + theID
627
+
628
+ theYear = paper.find("{http://www.w3.org/2005/Atom}published").text.split('-')[0]
629
+
630
+ theDOIs = []
631
+ DOIs = paper.getiterator("{http://arxiv.org/schemas/atom}doi")
632
+ for DOI in DOIs:
633
+ theDOIs += [DOI.text]
634
+
635
+ journal = paper.find("{http://arxiv.org/schemas/atom}journal_ref")
636
+ theJournal = None
637
+ if journal != None:
638
+ theJournal = journal.text
639
+
640
+ publicationDict = dict({
641
+ "ID": theID,
642
+ "authors": theAuthors,
643
+ "title": theTitle,
644
+ "abstract": theAbstract,
645
+ "year": theYear,
646
+ "PDF": thePDF,
647
+ "link": theLink,
648
+ "DOI": theDOIs,
649
+ "journal": theJournal})
650
+ publications += [publicationDict]
651
+
652
+ preprintIDs = []
653
+ preprints = []
654
+ publishedIDs = []
655
+ published = []
656
+
657
+ publications.sort(comparePaperDictionaries, None, True)
658
+
659
+ for publication in publications:
660
+ if publication["journal"] != None:
661
+ published += [publication]
662
+ publishedIDs += [publication["ID"]]
663
+ else:
664
+ preprints += [publication]
665
+ preprintIDs += [publication["ID"]]
666
+
667
+ output += ["<div class='formatpicker'>Format:<ul class='outputtypes'>\n",
668
+ """<li><a href='javascript:showType("bibtex");' id='bibtex-link'>BibTeX</a></li>\n""",
669
+ """<li><a href='javascript:showType("biblatex");' id='biblatex-link'>BibLaTeX</a></li>\n""",
670
+ """<li><a href='javascript:showType("bibitem");' id='bibitem-link'>\\bibitem</a></li>\n""",
671
+ """<li><a href='javascript:showType("html");' id='html-link'>HTML</a></li>\n""",
672
+ """<li><a href='javascript:showType("wiki");' id='wiki-link'>Wiki</a></li>\n""",
673
+ "</ul>\n</div>\n"]
674
+
675
+ if len(papers) >= maxpapers:
676
+ output += ["<div class='warning'>We can only process " + str(maxpapers) + " paper IDs at a time. " + str(len(papers) - maxpapers) + " of the IDs you entered were ignored.</div>"]
677
+
678
+ journalrefnote = """<p><em>Please <a class="editlink" href="https://arxiv.org/user/" title="Go to arXiv user page where you can edit the information stored for your papers.">add the journal reference and <abbr title="Document Object Identifier">DOI</abbr> for your papers as soon as they are published</a>.</em></p>"""
679
+
680
+ output += ["<div id='bibtex'>\n"]
681
+ if len(preprints) > 0:
682
+ output += ["<h2>Preprints:</h2>\n", journalrefnote]
683
+ output += bibTeXMarkup(preprints, "bibtex")
684
+ if len(published) > 0:
685
+ output += ["<h2>Published:</h2>\n"]
686
+ output += ["""<p>These BibTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
687
+ output += bibTeXMarkup(published, "bibtex")
688
+ output += ["</div>\n"]
689
+
690
+ output += ["<div id='biblatex'>\n"]
691
+ if len(preprints) > 0:
692
+ output += ["<h2>Preprints:</h2>\n", journalrefnote]
693
+ output += bibTeXMarkup(preprints, "biblatex")
694
+ if len(published) > 0:
695
+ output += ["<h2>Published:</h2>\n"]
696
+ output += ["""<p>These BibLaTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
697
+ output += bibTeXMarkup(published, "biblatex")
698
+ output += ["</div>\n"]
699
+
700
+ output += ["<div id='bibitem'>\n"]
701
+ if len(preprints) > 0:
702
+ output += ["<h2>Preprints:</h2>\n", journalrefnote]
703
+ output += bibItemMarkup(preprints)
704
+ if len(published) > 0:
705
+ output += ["<h2>Published:</h2>\n"]
706
+ output += bibItemMarkup(published)
707
+ output += ["</div>\n"]
708
+
709
+ output += ["<div id='html'>\n"]
710
+ if len(preprints) > 0:
711
+ output += ["<h2>Preprints:</h2>\n", journalrefnote]
712
+ output += htmlMarkup(preprints, "Preprint")
713
+ if len(published) > 0:
714
+ output += ["<h2>Published:</h2>\n"]
715
+ output += htmlMarkup(published, "Published")
716
+ output += ["</div>\n"]
717
+
718
+ output += ["<div id='wiki'>\n"]
719
+ if len(preprints) > 0:
720
+ output += ["<h2>Preprints:</h2>\n", journalrefnote]
721
+ output += wikiMarkup(preprints, "Preprint")
722
+ if len(published) > 0:
723
+ output += ["<h2>Published:</h2>\n"]
724
+ output += wikiMarkup(published, "Published")
725
+ output += ["</div>\n"]
726
+
727
+
728
+ if len(failedIDs) > 0:
729
+ if len(failedIDs) == 1:
730
+ printHtml("""<div class="warning">No paper with the ID “""" + failedIDs[0] + """” could be found on the arXiv.</div>\n""", outputformat)
731
+ else:
732
+ printHtml("""<div class="warning">The following paper IDs could not be found on the arXiv: """ + ", ".join(failedIDs) + """.</div>\n""", outputformat)
733
+
734
+ printHtml("".join(output), outputformat)
735
+ printPublicationsRaw(publications, format, outputformat)
736
+ else:
737
+ printHtml(extraInfo(), outputformat)
738
+
739
+ printHtml(pageFoot(), outputformat)
740
+
741
+
742
+ """
743
+ MAIN SCRIPT *****************************************************************
744
+ """
745
+ # form = cgi.FieldStorage()
746
+ # processCgi(form)