gagan3012 commited on
Commit
0e908e7
·
1 Parent(s): c27d439

Delete lookup.py

Browse files
Files changed (1) hide show
  1. lookup.py +0 -746
lookup.py DELETED
@@ -1,746 +0,0 @@
1
- #!/usr/bin/env python
2
- #coding=utf-8
3
- """
4
- arXivToBibTeX / arXivToWiki v7.2
5
- ©2009-2020 Sven-S. Porst / earthlingsoft <ssp-web@earthlingsoft.net>
6
-
7
- Service available at: https://arxiv2bibtex.org
8
- Source code available at: https://github.com/ssp/arXivToBibTeX
9
-
10
- Originally created for Courant Research Centre
11
- ‘Higher Order Structures in Mathematics’ at the
12
- Mathematics Institute at the University of Göttingen.
13
-
14
- Links for form submission refer to the folder of the current path without a
15
- further filename:
16
- /?q=searchTerm
17
-
18
- Your server setup (.htaccess file) needs to make sure that these requests are
19
- redirected to the script.
20
- """
21
-
22
- # import cgi
23
- import re
24
- import urllib
25
- from urlparse import urlparse
26
- from xml.etree import ElementTree
27
- import xml.etree
28
- import os
29
- import sys
30
- reload(sys)
31
- sys.setdefaultencoding("utf-8")
32
-
33
- #for debugging
34
- #import cgitb
35
- #cgitb.enable()
36
-
37
- maxpapers = 100
38
-
39
- trailingRE = re.compile(r"(.*)v[0-9]*$")
40
- newStyleRE = re.compile(r"\d{4}\.?\d{4,}$")
41
- sevenDigitsRE = re.compile(r"\d{7}$")
42
- oldStyleIDRE = re.compile(r"[a-z-]+/\d{7}$")
43
- paperIDRE = re.compile(r"([a-z-]+/\d{7}|\d{4}\.\d{4,})")
44
-
45
-
46
-
47
- def prepareArXivID(ID):
48
- """
49
- first, strip potentially trailing version numbers like v4
50
- 0909.1234 or 1504.12345-style ID => return unchanged
51
- 09091234 or 159412345-style ID => return 0909.1234 or 1504.12345
52
- 0606123-style ID => return math/0606123
53
- non-math/0606123-style ID => return unchanged
54
- anything else => return None
55
- """
56
- myID = ID.strip()
57
- myID = trailingRE.sub(r"\1", myID)
58
- if newStyleRE.match(myID) != None:
59
- """ An 8+ digit number (new-style): insert dot in the middle in case it's not there already. """
60
- if re.match(r"\.", myID) == None:
61
- myID = re.sub(r"(\d\d\d\d)(\d\d\d\d+)$", r"\1.\2", myID)
62
- elif sevenDigitsRE.match(myID) != None:
63
- """ Just seven digits: prepend math/ """
64
- myID = "math/" + myID
65
- elif oldStyleIDRE.match(myID) != None:
66
- myID = myID
67
- else:
68
- myID = None
69
-
70
- return myID
71
-
72
-
73
- def extractPapersFromArXivUriPath(path):
74
- """
75
- An arXiv URL was entered, extract the last component(s) as the paper ID
76
- Match both old math.ph/9902123 and new 1705.12345 style path segments
77
- """
78
- paperIDMatch = paperIDRE.search(path)
79
- if paperIDMatch != None:
80
- return paperIDMatch.string[paperIDMatch.start(1):paperIDMatch.end(1)]
81
-
82
-
83
-
84
- def printAll(output):
85
- print(output)
86
-
87
-
88
-
89
- def printHtml(HTML, output_format):
90
- if output_format == "html":
91
- print(HTML)
92
-
93
-
94
-
95
-
96
- def printPublicationsRaw(publications, format, outputformat):
97
- if outputformat == "raw":
98
- if format == "html":
99
- print("\n\n".join(map(lambda publication: basicMarkupForHTMLEditing(publication), publications)))
100
- elif format == "bibtex" or format == "biblatex":
101
- print ("\n\n".join(map(lambda publication: markupForBibTeXItem(publication, format), publications)))
102
- elif format == "bibitem":
103
- print ("\n\n".join(map(lambda publication: markupForBibItem(publication), publications)))
104
- elif format == "wiki":
105
- print ("\n".join(map(lambda publication: markupForWikiItem(publication), publications)))
106
-
107
-
108
-
109
-
110
- def escapeHTML(inputString):
111
- """
112
- Input: string
113
- Output: input string with < > & " replaced by their HTML character entities
114
- """
115
- return cgi.escape(inputString, True)
116
-
117
-
118
-
119
- def theForm(format, queryString):
120
- """
121
- Returns string with HTML for the search form.
122
- The form is pre-filled with the current query string.
123
- """
124
- return '''
125
- <form method="get" action="./">
126
- <p>
127
- <input type="text" name="q" class="q" autofocus="autofocus" placeholder="1510.01797 or courant_r_1" value="''' + escapeHTML(queryString) + '''"/>
128
- <input type="hidden" name="format" id="formatinput" value="''' + escapeHTML(format) + '''"/>
129
- <input type="submit" class="button" value="Retrieve Information"/>
130
- </p>
131
- </form>
132
- '''
133
-
134
-
135
-
136
- def outputformatToMimeType(outputformat):
137
- if outputformat == "html":
138
- return "text/html"
139
- elif outputformat == "bibtex" or outputformat == "biblatex":
140
- return "application/x-bibtex"
141
- else:
142
- return "text/plain"
143
-
144
-
145
-
146
- def pageHead(queryString, format, outputformat):
147
- """
148
- Returns string with HTML for the http header and the top of the HTML markup including CSS and JavaScript.
149
- """
150
- if outputformat == "raw":
151
- return "Content-type: " + outputformatToMimeType(outputformat) + "; charset=UTF-8\n"
152
- else:
153
- title = "arXiv To Wiki"
154
- if isRunningFromBibTeXURI():
155
- title = "arXiv To BibTeX"
156
- elif isRunningFromHTMLURI():
157
- title = "arXiv to HTML"
158
-
159
- return """Content-type: text/html; charset=UTF-8
160
-
161
- <!DOCTYPE html>
162
- <html lang="en">
163
- <head>
164
- <title>""" + title + """</title>
165
- <meta name='generator' content='arXiv to Wiki/BibTeX Converter, 2009-2015 by Sven-S. Porst (ssp-web@earthlingsoft.net).'/>
166
- <meta name='description' content='Create BibTeX, HTML or Wiki markup for papers on the mathematics and physics preprint arXiv.'/>
167
- <style>
168
- * { margin: 0em; padding: 0em; }
169
- body { width: 40em; font-family: Georgia, Times, serif; line-height: 141%; margin:auto; background: #eee;}
170
- .clear { clear:both; }
171
- #title { text-align:center; margin:3em 1em; }
172
- p { margin: 0.5em 0em; }
173
- a { text-decoration: none; color: #00d; }
174
- a:hover { text-decoration: underline; color: #00f; }
175
- a:visited { color: #606; }
176
- a.editlink { color: #b00;}
177
- h1 { font-size: 144%; margin: 0.5em;}
178
- a h1 { color: #000; }
179
- form { display:block; margin: 1em; }
180
- form p { text-align:center; }
181
- form input { font-size: 121%; }
182
- form input.q { width: 60%; margin-bottom: 1em; }
183
- form input.button { position:relative; bottom: 3px; }
184
- h2 { font-size: 121%; margin:2em 0em 1em 0em; position:relative; }
185
- h2:before { content: "\\002767"; position: absolute; width: 1em; left:-1em; font-size: 360%; color: #999; }
186
- h2.error:before { content: "\\002718"; color: #f33; }
187
- ul { padding-left: 2em; }
188
- ul li { margin-bottom: 0.5em; }
189
- .formatpicker { text-align: right; margin:1em 0em -1em 0em; }
190
- .formatpicker ul { display: inline; list-style-type: none; padding: 0px; }
191
- .formatpicker ul li { display: inline; margin-left: 0.5em; font-weight: normal; padding: 0em; }
192
- .format { display: none; }
193
- textarea { width:100%; }
194
- .warning { font-style:italic; text-align:center; margin: 1em 0em; color: #900;}
195
- #foot { font-size: 80%; font-style:italic; text-align: center; margin: 3em 0em 1em 0em; padding-top: 0.2em; border-top: #999 solid 1px; }
196
- </style>
197
- <script>
198
- //<![CDATA[
199
- function showType(type) {
200
- var myTypes = ["bibtex", "biblatex", "bibitem", "html", "wiki"];
201
- var myType = (!type || myTypes.indexOf(type) === -1) ? "wiki" : type;
202
- document.getElementById("formatinput").value = myType;
203
- for (var i = 0; i < myTypes.length; i++) {
204
- var name = myTypes[i]
205
- var linkID = name.concat("-link");
206
- if (name === myType) {
207
- document.getElementById(name).style.display = "block";
208
- document.getElementById(linkID).style.fontWeight = "bold";
209
- }
210
- else {
211
- document.getElementById(name).style.display = "none";
212
- document.getElementById(linkID).style.fontWeight = "normal";
213
- }
214
- }
215
- }
216
- //]]>
217
- </script>
218
- </head>
219
- <body onload="javascript:showType('""" + format + """');">
220
- <div id="page">
221
- <div id="title">
222
- <h1><a href="./">Retrieve arXiv Information</a></h1>
223
- </div>
224
- """ + theForm(format, queryString)
225
-
226
-
227
-
228
-
229
- def extraInfo():
230
- """
231
- Returns string with HTML explaining what to enter into the form.
232
- Displayed beneath the search field on pages without results.
233
- """
234
- return """
235
- <p>
236
- Use the form above to get information for <a href="https://www.arxiv.org/">arXiv</a> submissions
237
- for use in BibTeX, on web pages or in Wikis. You can enter:
238
- </p>
239
- <ul>
240
- <li>
241
- <p>
242
- one or several <em>paper IDs</em> like “1510.01797” or “math/0506203”.
243
- </p>
244
- </li><li>
245
- <p>
246
- your <a href="https://arxiv.org/help/author_identifiers">arXiv <em>author ID</em></a>
247
- looking similar to “grafvbothmer_h_1” to get a list of all your submitted papers.
248
- </p>
249
- </li>
250
- <li>
251
- <p>
252
- your <a href="https://orcid.org">ORCID ID</a> looking similar to “0000-0003-0136-444X”
253
- which you should register with your arXiv-account.
254
- </p>
255
- </li>
256
- </ul>
257
- """
258
-
259
-
260
-
261
- def pageFoot():
262
- """
263
- Returns string with HTML for the bottom of the page.
264
- """
265
- return """<div id="foot">
266
- Data from <a href="https://arxiv.org/help/api/index">arXiv API</a>
267
- · Site by <a href="https://earthlingsoft.net/ssp">Sven-S. Porst</a>
268
- · <a href="https://github.com/ssp/arXivToWiki/issues">Feedback</a>
269
- </div>
270
- </div>
271
- </body>
272
- </html>
273
- """
274
-
275
-
276
-
277
-
278
- def htmlMarkup(items, type):
279
- """
280
- Input: items - List of publication dictionaries.
281
- type - "Preprint" or "Published".
282
- Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in HTML markup.
283
- """
284
- markup = []
285
- if len(items) > 0:
286
-
287
- htmlMarkup = ["<ul>\n"]
288
- for item in items:
289
- htmlMarkup += ["<li>\n", escapeHTML(basicMarkupForHTMLEditing(item)), "\n</li>"]
290
- htmlMarkup += ["\n</ul>"]
291
- factor = 4
292
- if type == "Published":
293
- factor = 5
294
- markup = ["<textarea class='htmlinfo' cols='70' rows='", str( factor * len(items) + 2), "'>\n"] + htmlMarkup + ["</textarea>\n"]
295
- return markup
296
-
297
-
298
-
299
-
300
- def basicMarkupForHTMLEditing(myDict):
301
- """
302
- Input: myDict - dictionary with publication data.
303
- Output: String with HTML markup for publication data.
304
- """
305
- authors = myDict["authors"]
306
- htmlauthors = []
307
- for author in authors:
308
- htmlauthors += [author]
309
- output = [", ".join(htmlauthors), ': “', myDict["title"], '”, ', myDict["year"]]
310
- if myDict["journal"] != None:
311
- output += [", ", myDict["journal"]]
312
- output += ["; <a href='", myDict["link"], "'>arXiv:", myDict["ID"], "</a>."]
313
- if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
314
- dois = []
315
- for DOI in myDict["DOI"]:
316
- dois += ["<a href='https://dx.doi.org/" + DOI + "'>" + DOI + "</a>"]
317
- output += [" DOI: ", ", ".join(dois), "."]
318
-
319
- return "".join(output)
320
-
321
-
322
-
323
-
324
- def wikiMarkup(items, type):
325
- """
326
- Input: items - List of publication dictionaries.
327
- type - "Preprint" or "Publication".
328
- Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in Wiki markup.
329
- """
330
- markup = []
331
- if len(items) > 0:
332
-
333
- wikiMarkup = []
334
- htmlMarkup = []
335
- for item in items:
336
- wikiMarkup += [markupForWikiItem(item), "\n\n"]
337
- htmlMarkup += [basicMarkupForHTMLEditing(item)]
338
-
339
- wikiMarkup[-1] = wikiMarkup[-1].strip("\n")
340
- factor = 3
341
- if type == "Published":
342
- factor = 4
343
- markup = ["<p>Preview:</p>\n", "<ul><li>" , "\n</li><li>".join(htmlMarkup), "</li></ul>\n", "<p class='clear'>For copy and pasting to a Wiki:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str( factor * len(items)), "'>\n"] + wikiMarkup + ["</textarea>\n"]
344
- return markup
345
-
346
-
347
-
348
-
349
- def markupForWikiItem(myDict):
350
- """
351
- Input: dictionary with publication data.
352
- Output: Wiki markup for publication data.
353
- """
354
- authors = myDict["authors"]
355
- wikiauthors = []
356
- for author in authors:
357
- wikiauthors += [author]
358
-
359
- wikioutput = ["* ", ", ".join(wikiauthors), ': “', myDict["title"], '”, ', myDict["year"]]
360
- if myDict["journal"] != None:
361
- wikioutput += [", ", myDict["journal"]]
362
- wikioutput += ["; [", myDict["link"], " arXiv:", myDict["ID"], "]."]
363
- if myDict["DOI"] != None and len(myDict["DOI"]) > 0 :
364
- dois = []
365
- for DOI in myDict["DOI"]:
366
- dois += ["[https://dx.doi.org/" + DOI + " " + DOI + "]"]
367
- wikioutput += [" DOI: ", ", ".join(dois) , "."]
368
- result = "".join(wikioutput)
369
- result = re.sub(r"\s+", r" ", result)
370
- return result
371
-
372
-
373
-
374
-
375
- def bibTeXMarkup(items, format):
376
- """
377
- Input: List of publication dictionaries.
378
- Output: Array of strings containing HTML markup with a heading and a textarea full of BibTeX records.
379
- """
380
- markup = []
381
- if len(items) > 0:
382
- linecount = 0
383
- itemmarkup = []
384
- for item in items:
385
- bibtexmarkup = markupForBibTeXItem(item, format)
386
- itemmarkup += [bibtexmarkup]
387
- linecount += len(bibtexmarkup.split('\n'))
388
- markup += ["<textarea class='wikiinfo' cols='70' rows='", str(linecount + len(items) - 1), "'>\n", "\n\n".join(itemmarkup), "</textarea>\n"]
389
- return markup
390
-
391
-
392
-
393
- def markupForBibTeXItem(myDict, format):
394
- """
395
- Input: dictionary with publication data.
396
- Output: BibTeX record for the preprint.
397
- """
398
- bibTeXID = myDict["ID"]
399
- bibTeXAuthors = " and ".join(myDict["authors"])
400
- bibTeXTitle = myDict["title"]
401
- bibTeXYear = myDict["year"]
402
-
403
- hasDOI = myDict["DOI"] != None and len(myDict["DOI"]) > 0
404
- hasJournal = myDict["journal"] != None
405
- isPublished = hasJournal or hasDOI
406
-
407
- publicationType = ("@online" if format == "biblatex" else "@misc") if not isPublished else "@article"
408
-
409
- eprintPrefix = "" if format == "biblatex" else "arXiv:"
410
- bibTeXEntry = [publicationType, "{", bibTeXID, ",\nAuthor = {", bibTeXAuthors, "},\nTitle = {", bibTeXTitle, "},\nYear = {", bibTeXYear, "},\nEprint = {", eprintPrefix, bibTeXID, "},\n"]
411
- if format == "biblatex":
412
- bibTeXEntry += ["Eprinttype = {arXiv},\n"]
413
- if hasJournal:
414
- bibTeXEntry += ["Howpublished = {", myDict["journal"], "},\n"]
415
- if hasDOI:
416
- bibTeXEntry += ["Doi = {", " ".join(myDict["DOI"]), "},\n"]
417
- bibTeXEntry += ["}"]
418
- result = "".join(bibTeXEntry)
419
- return result
420
-
421
-
422
-
423
- def bibItemMarkup(items):
424
- """
425
- Input: List of publication dictionaries.
426
- Output: Array of strings containing HTML markup with a heading and a textarea full of \bibitem commands.
427
- """
428
- markup = []
429
- if len(items) > 0:
430
- linecount = 0
431
- itemmarkup = []
432
- for item in items:
433
- bibItem = markupForBibItem(item)
434
- itemmarkup += [bibItem]
435
- linecount += len(bibItem.split('\n'))
436
- markup = ["<p>Simple-minded \\bibitems:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str(linecount + 3), "'>\\begin{thebibliography}\n\n", "\n".join(itemmarkup), "\n\end{thebibliography}</textarea>\n"]
437
- return markup
438
-
439
-
440
- def markupForBibItem(myDict):
441
- """
442
- Input: dictionary with publication data.
443
- Output: LaTeX \bibitem command for the publication
444
- """
445
- bibTeXID = myDict["ID"]
446
- authors = myDict["authors"]
447
- authorString = ""
448
- if len(authors) == 1:
449
- authorString = authors[0]
450
- elif len(authors) > 1:
451
- firstAuthors = authors[:-1]
452
- lastAuthor = authors[-1]
453
- authorString = ", ".join(firstAuthors) + " and " + lastAuthor
454
-
455
- title = myDict["title"]
456
- year = myDict["year"]
457
-
458
- bibItemCommand = ["\\bibitem{", bibTeXID, "}\n", authorString, ".\n\\newblock ", title, ", ", year]
459
- if myDict["journal"] != None:
460
- bibItemCommand += [",\n\\newblock ", myDict["journal"]]
461
- bibItemCommand += [";\n\\newblock arXiv:", bibTeXID, "."]
462
- if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
463
- bibItemCommand += ["\n\\newblock DOI: ", " ".join(myDict["DOI"]), "."]
464
- result = "".join(bibItemCommand) + "\n"
465
- return result
466
-
467
-
468
-
469
-
470
-
471
- def errorMarkup(errorText):
472
- """
473
- Return markup for the error text received.
474
- """
475
- return """<h2 class="error">No results</h2>
476
- <p>""" + errorText + """</p>
477
- <p>If you think you entered a valid arXiv ID and you keep getting this error message, please accept our apologies and <a href="https://github.com/ssp/arXivToWiki/issues">let me know</a>.</p>
478
- """
479
-
480
-
481
-
482
- def isRunningFromBibTeXURI():
483
- return isInRequestURI("bibtex")
484
-
485
- def isRunningFromHTMLURI():
486
- return isInRequestURI("html")
487
-
488
- def isInRequestURI(string):
489
- return isInEnvironment("REQUEST_URI", string) or isInEnvironment("HTTP_HOST", string)
490
-
491
- def isInEnvironment(fieldName, string):
492
- if fieldName in os.environ:
493
- if os.environ[fieldName].lower().find(string) != -1:
494
- return True
495
- return False
496
-
497
-
498
- IDCleanerRE = re.compile(r"[^0-9]*([0-9]*)\.?([0-9]*)")
499
-
500
- def comparePaperDictionaries (firstPaper, secondPaper):
501
- """
502
- Compare paper dictionaries.
503
- Earlier years are smaller.
504
- Smaller IDs within a year are smaller.
505
- """
506
- comparisonResult = 0
507
- if firstPaper.has_key("year") and firstPaper.has_key("ID") and secondPaper.has_key("year") and secondPaper.has_key("ID"):
508
- comparisonResult = cmp(firstPaper["year"], secondPaper["year"])
509
-
510
- if comparisonResult == 0:
511
- cleanedFirstID = int(IDCleanerRE.sub(r"\1\2", firstPaper["ID"]))
512
- cleanedSecondID = int(IDCleanerRE.sub(r"\1\2", secondPaper["ID"]))
513
- comparisonResult = cmp(cleanedFirstID, cleanedSecondID)
514
-
515
- return comparisonResult
516
-
517
-
518
-
519
- def processCgi(form):
520
- queryString = ""
521
- papers = []
522
- personID = ""
523
- if form.has_key("q"):
524
- queryString = form["q"].value
525
- papers = list(set(re.sub(r",", r" ", queryString).split()))
526
- """
527
- for a single entry matching a regex we have an arXiv or ORCID autor ID
528
- see https://arxiv.org/help/author_identifiers
529
- """
530
- if len(papers) == 1:
531
- arxivAuthorIDRegex = r"([a-z]*_[a-z]_[0-9]*)"
532
- orcidIDRegex = r"((https://orcid.org/)?\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[0-9X])"
533
- authorMatch = re.search(arxivAuthorIDRegex + "|" + orcidIDRegex, papers[0])
534
- if authorMatch != None:
535
- personID = authorMatch.string[authorMatch.start():authorMatch.end()]
536
- urlParts = urlparse(queryString)
537
- if urlParts.netloc == "arxiv.org":
538
- fromUriPath = extractPapersFromArXivUriPath(urlParts.path)
539
- if fromUriPath != None:
540
- papers = [fromUriPath]
541
-
542
- outputformat = "html"
543
- if form.has_key("outputformat"):
544
- of = form["outputformat"].value
545
- if of in ["html", "raw"]:
546
- outputformat = of
547
-
548
- format = "wiki"
549
- if isRunningFromBibTeXURI():
550
- format = "bibtex"
551
- elif isRunningFromHTMLURI():
552
- format = "html"
553
- if form.has_key("format"):
554
- f = form["format"].value
555
- if f in ["wiki", "bibtex", "biblatex", "bibitem", "html"]:
556
- format = f
557
-
558
- printAll(pageHead(queryString, format, outputformat))
559
-
560
- if form.has_key("q"):
561
- failedIDs = []
562
- if personID == "":
563
- arXivIDs = []
564
- for paperID in papers:
565
- processedID = prepareArXivID(paperID)
566
- if processedID != None:
567
- arXivIDs += [processedID]
568
- else:
569
- failedIDs += [paperID]
570
- arXivURL = "https://export.arxiv.org/api/query?id_list=" + ",".join(arXivIDs) + "&max_results=" + str(maxpapers)
571
- else:
572
- arXivURL = "https://arxiv.org/a/" + personID + ".atom"
573
-
574
- download = urllib.urlopen(arXivURL)
575
- download.encoding = "UTF-8"
576
- downloadedData = download.read()
577
- if downloadedData == None:
578
- printHtml(extraInfo(), outputformat)
579
- printHtml(errorMarkup("The arXiv data could not be retrieved."), outputformat)
580
- else:
581
- publications = []
582
- feed = xml.etree.ElementTree.fromstring(downloadedData)
583
- output = []
584
-
585
- """ Check for an error by looking at the title of the first paper: errors are marked by 'Error', empty feeds don't have a title """
586
- firstTitle = feed.find("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}title")
587
- if firstTitle == None or firstTitle.text == "Error":
588
- lookupSubject = "paper ID"
589
- if personID == "" and len(papers) > 1:
590
- lookupSubject = "paper IDs"
591
- elif personID != "":
592
- lookupSubject = "author ID"
593
-
594
- printHtml(extraInfo(), outputformat)
595
- printHtml(errorMarkup("The arXiv did not return any results for the " + lookupSubject + " you entered. Any chance there may be a typo in there?"), outputformat)
596
- else:
597
- """ We got data and no error: Process it. """
598
- papersiterator = feed.getiterator("{http://www.w3.org/2005/Atom}entry")
599
- for paper in papersiterator:
600
- titleElement = paper.find("{http://www.w3.org/2005/Atom}title")
601
- if titleElement == None:
602
- continue
603
- theTitle = re.sub(r"\s*\n\s*", r" ", titleElement.text)
604
- authors = paper.getiterator("{http://www.w3.org/2005/Atom}author")
605
- theAuthors = []
606
- for author in authors:
607
- name = author.find("{http://www.w3.org/2005/Atom}name").text
608
- theAuthors += [name]
609
- theAbstract = paper.find("{http://www.w3.org/2005/Atom}summary").text.strip()
610
-
611
- links = paper.getiterator("{http://www.w3.org/2005/Atom}link")
612
- thePDF = ""
613
- theLink = ""
614
- for link in links:
615
- attributes = link.attrib
616
- if attributes.has_key("href"):
617
- linktarget = attributes["href"]
618
- linktype = attributes["type"] if attributes.has_key("type") else None
619
- linktitle = attributes["title"] if attributes.has_key("title") else None
620
- if linktype == "application/pdf":
621
- thePDF = linktarget
622
- elif linktype == "text/html":
623
- theLink = linktarget
624
- splitLink = theLink.split("/abs/")
625
- theID = splitLink[-1].split('v')[0]
626
- theLink = splitLink[0] + "/abs/" + theID
627
-
628
- theYear = paper.find("{http://www.w3.org/2005/Atom}published").text.split('-')[0]
629
-
630
- theDOIs = []
631
- DOIs = paper.getiterator("{http://arxiv.org/schemas/atom}doi")
632
- for DOI in DOIs:
633
- theDOIs += [DOI.text]
634
-
635
- journal = paper.find("{http://arxiv.org/schemas/atom}journal_ref")
636
- theJournal = None
637
- if journal != None:
638
- theJournal = journal.text
639
-
640
- publicationDict = dict({
641
- "ID": theID,
642
- "authors": theAuthors,
643
- "title": theTitle,
644
- "abstract": theAbstract,
645
- "year": theYear,
646
- "PDF": thePDF,
647
- "link": theLink,
648
- "DOI": theDOIs,
649
- "journal": theJournal})
650
- publications += [publicationDict]
651
-
652
- preprintIDs = []
653
- preprints = []
654
- publishedIDs = []
655
- published = []
656
-
657
- publications.sort(comparePaperDictionaries, None, True)
658
-
659
- for publication in publications:
660
- if publication["journal"] != None:
661
- published += [publication]
662
- publishedIDs += [publication["ID"]]
663
- else:
664
- preprints += [publication]
665
- preprintIDs += [publication["ID"]]
666
-
667
- output += ["<div class='formatpicker'>Format:<ul class='outputtypes'>\n",
668
- """<li><a href='javascript:showType("bibtex");' id='bibtex-link'>BibTeX</a></li>\n""",
669
- """<li><a href='javascript:showType("biblatex");' id='biblatex-link'>BibLaTeX</a></li>\n""",
670
- """<li><a href='javascript:showType("bibitem");' id='bibitem-link'>\\bibitem</a></li>\n""",
671
- """<li><a href='javascript:showType("html");' id='html-link'>HTML</a></li>\n""",
672
- """<li><a href='javascript:showType("wiki");' id='wiki-link'>Wiki</a></li>\n""",
673
- "</ul>\n</div>\n"]
674
-
675
- if len(papers) >= maxpapers:
676
- output += ["<div class='warning'>We can only process " + str(maxpapers) + " paper IDs at a time. " + str(len(papers) - maxpapers) + " of the IDs you entered were ignored.</div>"]
677
-
678
- journalrefnote = """<p><em>Please <a class="editlink" href="https://arxiv.org/user/" title="Go to arXiv user page where you can edit the information stored for your papers.">add the journal reference and <abbr title="Document Object Identifier">DOI</abbr> for your papers as soon as they are published</a>.</em></p>"""
679
-
680
- output += ["<div id='bibtex'>\n"]
681
- if len(preprints) > 0:
682
- output += ["<h2>Preprints:</h2>\n", journalrefnote]
683
- output += bibTeXMarkup(preprints, "bibtex")
684
- if len(published) > 0:
685
- output += ["<h2>Published:</h2>\n"]
686
- output += ["""<p>These BibTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
687
- output += bibTeXMarkup(published, "bibtex")
688
- output += ["</div>\n"]
689
-
690
- output += ["<div id='biblatex'>\n"]
691
- if len(preprints) > 0:
692
- output += ["<h2>Preprints:</h2>\n", journalrefnote]
693
- output += bibTeXMarkup(preprints, "biblatex")
694
- if len(published) > 0:
695
- output += ["<h2>Published:</h2>\n"]
696
- output += ["""<p>These BibLaTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
697
- output += bibTeXMarkup(published, "biblatex")
698
- output += ["</div>\n"]
699
-
700
- output += ["<div id='bibitem'>\n"]
701
- if len(preprints) > 0:
702
- output += ["<h2>Preprints:</h2>\n", journalrefnote]
703
- output += bibItemMarkup(preprints)
704
- if len(published) > 0:
705
- output += ["<h2>Published:</h2>\n"]
706
- output += bibItemMarkup(published)
707
- output += ["</div>\n"]
708
-
709
- output += ["<div id='html'>\n"]
710
- if len(preprints) > 0:
711
- output += ["<h2>Preprints:</h2>\n", journalrefnote]
712
- output += htmlMarkup(preprints, "Preprint")
713
- if len(published) > 0:
714
- output += ["<h2>Published:</h2>\n"]
715
- output += htmlMarkup(published, "Published")
716
- output += ["</div>\n"]
717
-
718
- output += ["<div id='wiki'>\n"]
719
- if len(preprints) > 0:
720
- output += ["<h2>Preprints:</h2>\n", journalrefnote]
721
- output += wikiMarkup(preprints, "Preprint")
722
- if len(published) > 0:
723
- output += ["<h2>Published:</h2>\n"]
724
- output += wikiMarkup(published, "Published")
725
- output += ["</div>\n"]
726
-
727
-
728
- if len(failedIDs) > 0:
729
- if len(failedIDs) == 1:
730
- printHtml("""<div class="warning">No paper with the ID “""" + failedIDs[0] + """” could be found on the arXiv.</div>\n""", outputformat)
731
- else:
732
- printHtml("""<div class="warning">The following paper IDs could not be found on the arXiv: """ + ", ".join(failedIDs) + """.</div>\n""", outputformat)
733
-
734
- printHtml("".join(output), outputformat)
735
- printPublicationsRaw(publications, format, outputformat)
736
- else:
737
- printHtml(extraInfo(), outputformat)
738
-
739
- printHtml(pageFoot(), outputformat)
740
-
741
-
742
- """
743
- MAIN SCRIPT *****************************************************************
744
- """
745
- # form = cgi.FieldStorage()
746
- # processCgi(form)