Spaces:
Running
Running
Create lookup.py
Browse files
lookup.py
ADDED
@@ -0,0 +1,746 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
#coding=utf-8
|
3 |
+
"""
|
4 |
+
arXivToBibTeX / arXivToWiki v7.2
|
5 |
+
©2009-2020 Sven-S. Porst / earthlingsoft <[email protected]>
|
6 |
+
|
7 |
+
Service available at: https://arxiv2bibtex.org
|
8 |
+
Source code available at: https://github.com/ssp/arXivToBibTeX
|
9 |
+
|
10 |
+
Originally created for Courant Research Centre
|
11 |
+
‘Higher Order Structures in Mathematics’ at the
|
12 |
+
Mathematics Institute at the University of Göttingen.
|
13 |
+
|
14 |
+
Links for form submission refer to the folder of the current path without a
|
15 |
+
further filename:
|
16 |
+
/?q=searchTerm
|
17 |
+
|
18 |
+
Your server setup (.htaccess file) needs to make sure that these requests are
|
19 |
+
redirected to the script.
|
20 |
+
"""
|
21 |
+
|
22 |
+
# import cgi
|
23 |
+
import re
|
24 |
+
import urllib
|
25 |
+
from urlparse import urlparse
|
26 |
+
from xml.etree import ElementTree
|
27 |
+
import xml.etree
|
28 |
+
import os
|
29 |
+
import sys
|
30 |
+
reload(sys)
|
31 |
+
sys.setdefaultencoding("utf-8")
|
32 |
+
|
33 |
+
#for debugging
|
34 |
+
#import cgitb
|
35 |
+
#cgitb.enable()
|
36 |
+
|
37 |
+
maxpapers = 100
|
38 |
+
|
39 |
+
trailingRE = re.compile(r"(.*)v[0-9]*$")
|
40 |
+
newStyleRE = re.compile(r"\d{4}\.?\d{4,}$")
|
41 |
+
sevenDigitsRE = re.compile(r"\d{7}$")
|
42 |
+
oldStyleIDRE = re.compile(r"[a-z-]+/\d{7}$")
|
43 |
+
paperIDRE = re.compile(r"([a-z-]+/\d{7}|\d{4}\.\d{4,})")
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
def prepareArXivID(ID):
|
48 |
+
"""
|
49 |
+
first, strip potentially trailing version numbers like v4
|
50 |
+
0909.1234 or 1504.12345-style ID => return unchanged
|
51 |
+
09091234 or 159412345-style ID => return 0909.1234 or 1504.12345
|
52 |
+
0606123-style ID => return math/0606123
|
53 |
+
non-math/0606123-style ID => return unchanged
|
54 |
+
anything else => return None
|
55 |
+
"""
|
56 |
+
myID = ID.strip()
|
57 |
+
myID = trailingRE.sub(r"\1", myID)
|
58 |
+
if newStyleRE.match(myID) != None:
|
59 |
+
""" An 8+ digit number (new-style): insert dot in the middle in case it's not there already. """
|
60 |
+
if re.match(r"\.", myID) == None:
|
61 |
+
myID = re.sub(r"(\d\d\d\d)(\d\d\d\d+)$", r"\1.\2", myID)
|
62 |
+
elif sevenDigitsRE.match(myID) != None:
|
63 |
+
""" Just seven digits: prepend math/ """
|
64 |
+
myID = "math/" + myID
|
65 |
+
elif oldStyleIDRE.match(myID) != None:
|
66 |
+
myID = myID
|
67 |
+
else:
|
68 |
+
myID = None
|
69 |
+
|
70 |
+
return myID
|
71 |
+
|
72 |
+
|
73 |
+
def extractPapersFromArXivUriPath(path):
|
74 |
+
"""
|
75 |
+
An arXiv URL was entered, extract the last component(s) as the paper ID
|
76 |
+
Match both old math.ph/9902123 and new 1705.12345 style path segments
|
77 |
+
"""
|
78 |
+
paperIDMatch = paperIDRE.search(path)
|
79 |
+
if paperIDMatch != None:
|
80 |
+
return paperIDMatch.string[paperIDMatch.start(1):paperIDMatch.end(1)]
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
def printAll(output):
|
85 |
+
print output
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
def printHtml(html, outputformat):
|
90 |
+
if outputformat == "html":
|
91 |
+
print html
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
def printPublicationsRaw(publications, format, outputformat):
|
97 |
+
if outputformat == "raw":
|
98 |
+
if format == "html":
|
99 |
+
print "\n\n".join(map(lambda publication: basicMarkupForHTMLEditing(publication), publications))
|
100 |
+
elif format == "bibtex" or format == "biblatex":
|
101 |
+
print "\n\n".join(map(lambda publication: markupForBibTeXItem(publication, format), publications))
|
102 |
+
elif format == "bibitem":
|
103 |
+
print "\n\n".join(map(lambda publication: markupForBibItem(publication), publications))
|
104 |
+
elif format == "wiki":
|
105 |
+
print "\n".join(map(lambda publication: markupForWikiItem(publication), publications))
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
|
110 |
+
def escapeHTML(inputString):
|
111 |
+
"""
|
112 |
+
Input: string
|
113 |
+
Output: input string with < > & " replaced by their HTML character entities
|
114 |
+
"""
|
115 |
+
return cgi.escape(inputString, True)
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
def theForm(format, queryString):
|
120 |
+
"""
|
121 |
+
Returns string with HTML for the search form.
|
122 |
+
The form is pre-filled with the current query string.
|
123 |
+
"""
|
124 |
+
return '''
|
125 |
+
<form method="get" action="./">
|
126 |
+
<p>
|
127 |
+
<input type="text" name="q" class="q" autofocus="autofocus" placeholder="1510.01797 or courant_r_1" value="''' + escapeHTML(queryString) + '''"/>
|
128 |
+
<input type="hidden" name="format" id="formatinput" value="''' + escapeHTML(format) + '''"/>
|
129 |
+
<input type="submit" class="button" value="Retrieve Information"/>
|
130 |
+
</p>
|
131 |
+
</form>
|
132 |
+
'''
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
def outputformatToMimeType(outputformat):
|
137 |
+
if outputformat == "html":
|
138 |
+
return "text/html"
|
139 |
+
elif outputformat == "bibtex" or outputformat == "biblatex":
|
140 |
+
return "application/x-bibtex"
|
141 |
+
else:
|
142 |
+
return "text/plain"
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def pageHead(queryString, format, outputformat):
|
147 |
+
"""
|
148 |
+
Returns string with HTML for the http header and the top of the HTML markup including CSS and JavaScript.
|
149 |
+
"""
|
150 |
+
if outputformat == "raw":
|
151 |
+
return "Content-type: " + outputformatToMimeType(outputformat) + "; charset=UTF-8\n"
|
152 |
+
else:
|
153 |
+
title = "arXiv To Wiki"
|
154 |
+
if isRunningFromBibTeXURI():
|
155 |
+
title = "arXiv To BibTeX"
|
156 |
+
elif isRunningFromHTMLURI():
|
157 |
+
title = "arXiv to HTML"
|
158 |
+
|
159 |
+
return """Content-type: text/html; charset=UTF-8
|
160 |
+
|
161 |
+
<!DOCTYPE html>
|
162 |
+
<html lang="en">
|
163 |
+
<head>
|
164 |
+
<title>""" + title + """</title>
|
165 |
+
<meta name='generator' content='arXiv to Wiki/BibTeX Converter, 2009-2015 by Sven-S. Porst ([email protected]).'/>
|
166 |
+
<meta name='description' content='Create BibTeX, HTML or Wiki markup for papers on the mathematics and physics preprint arXiv.'/>
|
167 |
+
<style>
|
168 |
+
* { margin: 0em; padding: 0em; }
|
169 |
+
body { width: 40em; font-family: Georgia, Times, serif; line-height: 141%; margin:auto; background: #eee;}
|
170 |
+
.clear { clear:both; }
|
171 |
+
#title { text-align:center; margin:3em 1em; }
|
172 |
+
p { margin: 0.5em 0em; }
|
173 |
+
a { text-decoration: none; color: #00d; }
|
174 |
+
a:hover { text-decoration: underline; color: #00f; }
|
175 |
+
a:visited { color: #606; }
|
176 |
+
a.editlink { color: #b00;}
|
177 |
+
h1 { font-size: 144%; margin: 0.5em;}
|
178 |
+
a h1 { color: #000; }
|
179 |
+
form { display:block; margin: 1em; }
|
180 |
+
form p { text-align:center; }
|
181 |
+
form input { font-size: 121%; }
|
182 |
+
form input.q { width: 60%; margin-bottom: 1em; }
|
183 |
+
form input.button { position:relative; bottom: 3px; }
|
184 |
+
h2 { font-size: 121%; margin:2em 0em 1em 0em; position:relative; }
|
185 |
+
h2:before { content: "\\002767"; position: absolute; width: 1em; left:-1em; font-size: 360%; color: #999; }
|
186 |
+
h2.error:before { content: "\\002718"; color: #f33; }
|
187 |
+
ul { padding-left: 2em; }
|
188 |
+
ul li { margin-bottom: 0.5em; }
|
189 |
+
.formatpicker { text-align: right; margin:1em 0em -1em 0em; }
|
190 |
+
.formatpicker ul { display: inline; list-style-type: none; padding: 0px; }
|
191 |
+
.formatpicker ul li { display: inline; margin-left: 0.5em; font-weight: normal; padding: 0em; }
|
192 |
+
.format { display: none; }
|
193 |
+
textarea { width:100%; }
|
194 |
+
.warning { font-style:italic; text-align:center; margin: 1em 0em; color: #900;}
|
195 |
+
#foot { font-size: 80%; font-style:italic; text-align: center; margin: 3em 0em 1em 0em; padding-top: 0.2em; border-top: #999 solid 1px; }
|
196 |
+
</style>
|
197 |
+
<script>
|
198 |
+
//<![CDATA[
|
199 |
+
function showType(type) {
|
200 |
+
var myTypes = ["bibtex", "biblatex", "bibitem", "html", "wiki"];
|
201 |
+
var myType = (!type || myTypes.indexOf(type) === -1) ? "wiki" : type;
|
202 |
+
document.getElementById("formatinput").value = myType;
|
203 |
+
for (var i = 0; i < myTypes.length; i++) {
|
204 |
+
var name = myTypes[i]
|
205 |
+
var linkID = name.concat("-link");
|
206 |
+
if (name === myType) {
|
207 |
+
document.getElementById(name).style.display = "block";
|
208 |
+
document.getElementById(linkID).style.fontWeight = "bold";
|
209 |
+
}
|
210 |
+
else {
|
211 |
+
document.getElementById(name).style.display = "none";
|
212 |
+
document.getElementById(linkID).style.fontWeight = "normal";
|
213 |
+
}
|
214 |
+
}
|
215 |
+
}
|
216 |
+
//]]>
|
217 |
+
</script>
|
218 |
+
</head>
|
219 |
+
<body onload="javascript:showType('""" + format + """');">
|
220 |
+
<div id="page">
|
221 |
+
<div id="title">
|
222 |
+
<h1><a href="./">Retrieve arXiv Information</a></h1>
|
223 |
+
</div>
|
224 |
+
""" + theForm(format, queryString)
|
225 |
+
|
226 |
+
|
227 |
+
|
228 |
+
|
229 |
+
def extraInfo():
|
230 |
+
"""
|
231 |
+
Returns string with HTML explaining what to enter into the form.
|
232 |
+
Displayed beneath the search field on pages without results.
|
233 |
+
"""
|
234 |
+
return """
|
235 |
+
<p>
|
236 |
+
Use the form above to get information for <a href="https://www.arxiv.org/">arXiv</a> submissions
|
237 |
+
for use in BibTeX, on web pages or in Wikis. You can enter:
|
238 |
+
</p>
|
239 |
+
<ul>
|
240 |
+
<li>
|
241 |
+
<p>
|
242 |
+
one or several <em>paper IDs</em> like “1510.01797” or “math/0506203”.
|
243 |
+
</p>
|
244 |
+
</li><li>
|
245 |
+
<p>
|
246 |
+
your <a href="https://arxiv.org/help/author_identifiers">arXiv <em>author ID</em></a>
|
247 |
+
looking similar to “grafvbothmer_h_1” to get a list of all your submitted papers.
|
248 |
+
</p>
|
249 |
+
</li>
|
250 |
+
<li>
|
251 |
+
<p>
|
252 |
+
your <a href="https://orcid.org">ORCID ID</a> looking similar to “0000-0003-0136-444X”
|
253 |
+
which you should register with your arXiv-account.
|
254 |
+
</p>
|
255 |
+
</li>
|
256 |
+
</ul>
|
257 |
+
"""
|
258 |
+
|
259 |
+
|
260 |
+
|
261 |
+
def pageFoot():
|
262 |
+
"""
|
263 |
+
Returns string with HTML for the bottom of the page.
|
264 |
+
"""
|
265 |
+
return """<div id="foot">
|
266 |
+
Data from <a href="https://arxiv.org/help/api/index">arXiv API</a>
|
267 |
+
· Site by <a href="https://earthlingsoft.net/ssp">Sven-S. Porst</a>
|
268 |
+
· <a href="https://github.com/ssp/arXivToWiki/issues">Feedback</a>
|
269 |
+
</div>
|
270 |
+
</div>
|
271 |
+
</body>
|
272 |
+
</html>
|
273 |
+
"""
|
274 |
+
|
275 |
+
|
276 |
+
|
277 |
+
|
278 |
+
def htmlMarkup(items, type):
|
279 |
+
"""
|
280 |
+
Input: items - List of publication dictionaries.
|
281 |
+
type - "Preprint" or "Published".
|
282 |
+
Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in HTML markup.
|
283 |
+
"""
|
284 |
+
markup = []
|
285 |
+
if len(items) > 0:
|
286 |
+
|
287 |
+
htmlMarkup = ["<ul>\n"]
|
288 |
+
for item in items:
|
289 |
+
htmlMarkup += ["<li>\n", escapeHTML(basicMarkupForHTMLEditing(item)), "\n</li>"]
|
290 |
+
htmlMarkup += ["\n</ul>"]
|
291 |
+
factor = 4
|
292 |
+
if type == "Published":
|
293 |
+
factor = 5
|
294 |
+
markup = ["<textarea class='htmlinfo' cols='70' rows='", str( factor * len(items) + 2), "'>\n"] + htmlMarkup + ["</textarea>\n"]
|
295 |
+
return markup
|
296 |
+
|
297 |
+
|
298 |
+
|
299 |
+
|
300 |
+
def basicMarkupForHTMLEditing(myDict):
|
301 |
+
"""
|
302 |
+
Input: myDict - dictionary with publication data.
|
303 |
+
Output: String with HTML markup for publication data.
|
304 |
+
"""
|
305 |
+
authors = myDict["authors"]
|
306 |
+
htmlauthors = []
|
307 |
+
for author in authors:
|
308 |
+
htmlauthors += [author]
|
309 |
+
output = [", ".join(htmlauthors), ': “', myDict["title"], '”, ', myDict["year"]]
|
310 |
+
if myDict["journal"] != None:
|
311 |
+
output += [", ", myDict["journal"]]
|
312 |
+
output += ["; <a href='", myDict["link"], "'>arXiv:", myDict["ID"], "</a>."]
|
313 |
+
if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
|
314 |
+
dois = []
|
315 |
+
for DOI in myDict["DOI"]:
|
316 |
+
dois += ["<a href='https://dx.doi.org/" + DOI + "'>" + DOI + "</a>"]
|
317 |
+
output += [" DOI: ", ", ".join(dois), "."]
|
318 |
+
|
319 |
+
return "".join(output)
|
320 |
+
|
321 |
+
|
322 |
+
|
323 |
+
|
324 |
+
def wikiMarkup(items, type):
|
325 |
+
"""
|
326 |
+
Input: items - List of publication dictionaries.
|
327 |
+
type - "Preprint" or "Publication".
|
328 |
+
Output: Array of strings containing HTML markup with a heading and a textarea full of bibliographic information in Wiki markup.
|
329 |
+
"""
|
330 |
+
markup = []
|
331 |
+
if len(items) > 0:
|
332 |
+
|
333 |
+
wikiMarkup = []
|
334 |
+
htmlMarkup = []
|
335 |
+
for item in items:
|
336 |
+
wikiMarkup += [markupForWikiItem(item), "\n\n"]
|
337 |
+
htmlMarkup += [basicMarkupForHTMLEditing(item)]
|
338 |
+
|
339 |
+
wikiMarkup[-1] = wikiMarkup[-1].strip("\n")
|
340 |
+
factor = 3
|
341 |
+
if type == "Published":
|
342 |
+
factor = 4
|
343 |
+
markup = ["<p>Preview:</p>\n", "<ul><li>" , "\n</li><li>".join(htmlMarkup), "</li></ul>\n", "<p class='clear'>For copy and pasting to a Wiki:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str( factor * len(items)), "'>\n"] + wikiMarkup + ["</textarea>\n"]
|
344 |
+
return markup
|
345 |
+
|
346 |
+
|
347 |
+
|
348 |
+
|
349 |
+
def markupForWikiItem(myDict):
|
350 |
+
"""
|
351 |
+
Input: dictionary with publication data.
|
352 |
+
Output: Wiki markup for publication data.
|
353 |
+
"""
|
354 |
+
authors = myDict["authors"]
|
355 |
+
wikiauthors = []
|
356 |
+
for author in authors:
|
357 |
+
wikiauthors += [author]
|
358 |
+
|
359 |
+
wikioutput = ["* ", ", ".join(wikiauthors), ': “', myDict["title"], '”, ', myDict["year"]]
|
360 |
+
if myDict["journal"] != None:
|
361 |
+
wikioutput += [", ", myDict["journal"]]
|
362 |
+
wikioutput += ["; [", myDict["link"], " arXiv:", myDict["ID"], "]."]
|
363 |
+
if myDict["DOI"] != None and len(myDict["DOI"]) > 0 :
|
364 |
+
dois = []
|
365 |
+
for DOI in myDict["DOI"]:
|
366 |
+
dois += ["[https://dx.doi.org/" + DOI + " " + DOI + "]"]
|
367 |
+
wikioutput += [" DOI: ", ", ".join(dois) , "."]
|
368 |
+
result = "".join(wikioutput)
|
369 |
+
result = re.sub(r"\s+", r" ", result)
|
370 |
+
return result
|
371 |
+
|
372 |
+
|
373 |
+
|
374 |
+
|
375 |
+
def bibTeXMarkup(items, format):
|
376 |
+
"""
|
377 |
+
Input: List of publication dictionaries.
|
378 |
+
Output: Array of strings containing HTML markup with a heading and a textarea full of BibTeX records.
|
379 |
+
"""
|
380 |
+
markup = []
|
381 |
+
if len(items) > 0:
|
382 |
+
linecount = 0
|
383 |
+
itemmarkup = []
|
384 |
+
for item in items:
|
385 |
+
bibtexmarkup = markupForBibTeXItem(item, format)
|
386 |
+
itemmarkup += [bibtexmarkup]
|
387 |
+
linecount += len(bibtexmarkup.split('\n'))
|
388 |
+
markup += ["<textarea class='wikiinfo' cols='70' rows='", str(linecount + len(items) - 1), "'>\n", "\n\n".join(itemmarkup), "</textarea>\n"]
|
389 |
+
return markup
|
390 |
+
|
391 |
+
|
392 |
+
|
393 |
+
def markupForBibTeXItem(myDict, format):
|
394 |
+
"""
|
395 |
+
Input: dictionary with publication data.
|
396 |
+
Output: BibTeX record for the preprint.
|
397 |
+
"""
|
398 |
+
bibTeXID = myDict["ID"]
|
399 |
+
bibTeXAuthors = " and ".join(myDict["authors"])
|
400 |
+
bibTeXTitle = myDict["title"]
|
401 |
+
bibTeXYear = myDict["year"]
|
402 |
+
|
403 |
+
hasDOI = myDict["DOI"] != None and len(myDict["DOI"]) > 0
|
404 |
+
hasJournal = myDict["journal"] != None
|
405 |
+
isPublished = hasJournal or hasDOI
|
406 |
+
|
407 |
+
publicationType = ("@online" if format == "biblatex" else "@misc") if not isPublished else "@article"
|
408 |
+
|
409 |
+
eprintPrefix = "" if format == "biblatex" else "arXiv:"
|
410 |
+
bibTeXEntry = [publicationType, "{", bibTeXID, ",\nAuthor = {", bibTeXAuthors, "},\nTitle = {", bibTeXTitle, "},\nYear = {", bibTeXYear, "},\nEprint = {", eprintPrefix, bibTeXID, "},\n"]
|
411 |
+
if format == "biblatex":
|
412 |
+
bibTeXEntry += ["Eprinttype = {arXiv},\n"]
|
413 |
+
if hasJournal:
|
414 |
+
bibTeXEntry += ["Howpublished = {", myDict["journal"], "},\n"]
|
415 |
+
if hasDOI:
|
416 |
+
bibTeXEntry += ["Doi = {", " ".join(myDict["DOI"]), "},\n"]
|
417 |
+
bibTeXEntry += ["}"]
|
418 |
+
result = "".join(bibTeXEntry)
|
419 |
+
return result
|
420 |
+
|
421 |
+
|
422 |
+
|
423 |
+
def bibItemMarkup(items):
|
424 |
+
"""
|
425 |
+
Input: List of publication dictionaries.
|
426 |
+
Output: Array of strings containing HTML markup with a heading and a textarea full of \bibitem commands.
|
427 |
+
"""
|
428 |
+
markup = []
|
429 |
+
if len(items) > 0:
|
430 |
+
linecount = 0
|
431 |
+
itemmarkup = []
|
432 |
+
for item in items:
|
433 |
+
bibItem = markupForBibItem(item)
|
434 |
+
itemmarkup += [bibItem]
|
435 |
+
linecount += len(bibItem.split('\n'))
|
436 |
+
markup = ["<p>Simple-minded \\bibitems:</p>\n", "<textarea class='wikiinfo' cols='70' rows='", str(linecount + 3), "'>\\begin{thebibliography}\n\n", "\n".join(itemmarkup), "\n\end{thebibliography}</textarea>\n"]
|
437 |
+
return markup
|
438 |
+
|
439 |
+
|
440 |
+
def markupForBibItem(myDict):
|
441 |
+
"""
|
442 |
+
Input: dictionary with publication data.
|
443 |
+
Output: LaTeX \bibitem command for the publication
|
444 |
+
"""
|
445 |
+
bibTeXID = myDict["ID"]
|
446 |
+
authors = myDict["authors"]
|
447 |
+
authorString = ""
|
448 |
+
if len(authors) == 1:
|
449 |
+
authorString = authors[0]
|
450 |
+
elif len(authors) > 1:
|
451 |
+
firstAuthors = authors[:-1]
|
452 |
+
lastAuthor = authors[-1]
|
453 |
+
authorString = ", ".join(firstAuthors) + " and " + lastAuthor
|
454 |
+
|
455 |
+
title = myDict["title"]
|
456 |
+
year = myDict["year"]
|
457 |
+
|
458 |
+
bibItemCommand = ["\\bibitem{", bibTeXID, "}\n", authorString, ".\n\\newblock ", title, ", ", year]
|
459 |
+
if myDict["journal"] != None:
|
460 |
+
bibItemCommand += [",\n\\newblock ", myDict["journal"]]
|
461 |
+
bibItemCommand += [";\n\\newblock arXiv:", bibTeXID, "."]
|
462 |
+
if myDict["DOI"] != None and len(myDict["DOI"]) > 0:
|
463 |
+
bibItemCommand += ["\n\\newblock DOI: ", " ".join(myDict["DOI"]), "."]
|
464 |
+
result = "".join(bibItemCommand) + "\n"
|
465 |
+
return result
|
466 |
+
|
467 |
+
|
468 |
+
|
469 |
+
|
470 |
+
|
471 |
+
def errorMarkup(errorText):
|
472 |
+
"""
|
473 |
+
Return markup for the error text received.
|
474 |
+
"""
|
475 |
+
return """<h2 class="error">No results</h2>
|
476 |
+
<p>""" + errorText + """</p>
|
477 |
+
<p>If you think you entered a valid arXiv ID and you keep getting this error message, please accept our apologies and <a href="https://github.com/ssp/arXivToWiki/issues">let me know</a>.</p>
|
478 |
+
"""
|
479 |
+
|
480 |
+
|
481 |
+
|
482 |
+
def isRunningFromBibTeXURI():
|
483 |
+
return isInRequestURI("bibtex")
|
484 |
+
|
485 |
+
def isRunningFromHTMLURI():
|
486 |
+
return isInRequestURI("html")
|
487 |
+
|
488 |
+
def isInRequestURI(string):
|
489 |
+
return isInEnvironment("REQUEST_URI", string) or isInEnvironment("HTTP_HOST", string)
|
490 |
+
|
491 |
+
def isInEnvironment(fieldName, string):
|
492 |
+
if fieldName in os.environ:
|
493 |
+
if os.environ[fieldName].lower().find(string) != -1:
|
494 |
+
return True
|
495 |
+
return False
|
496 |
+
|
497 |
+
|
498 |
+
IDCleanerRE = re.compile(r"[^0-9]*([0-9]*)\.?([0-9]*)")
|
499 |
+
|
500 |
+
def comparePaperDictionaries (firstPaper, secondPaper):
|
501 |
+
"""
|
502 |
+
Compare paper dictionaries.
|
503 |
+
Earlier years are smaller.
|
504 |
+
Smaller IDs within a year are smaller.
|
505 |
+
"""
|
506 |
+
comparisonResult = 0
|
507 |
+
if firstPaper.has_key("year") and firstPaper.has_key("ID") and secondPaper.has_key("year") and secondPaper.has_key("ID"):
|
508 |
+
comparisonResult = cmp(firstPaper["year"], secondPaper["year"])
|
509 |
+
|
510 |
+
if comparisonResult == 0:
|
511 |
+
cleanedFirstID = int(IDCleanerRE.sub(r"\1\2", firstPaper["ID"]))
|
512 |
+
cleanedSecondID = int(IDCleanerRE.sub(r"\1\2", secondPaper["ID"]))
|
513 |
+
comparisonResult = cmp(cleanedFirstID, cleanedSecondID)
|
514 |
+
|
515 |
+
return comparisonResult
|
516 |
+
|
517 |
+
|
518 |
+
|
519 |
+
def processCgi(form):
|
520 |
+
queryString = ""
|
521 |
+
papers = []
|
522 |
+
personID = ""
|
523 |
+
if form.has_key("q"):
|
524 |
+
queryString = form["q"].value
|
525 |
+
papers = list(set(re.sub(r",", r" ", queryString).split()))
|
526 |
+
"""
|
527 |
+
for a single entry matching a regex we have an arXiv or ORCID autor ID
|
528 |
+
see https://arxiv.org/help/author_identifiers
|
529 |
+
"""
|
530 |
+
if len(papers) == 1:
|
531 |
+
arxivAuthorIDRegex = r"([a-z]*_[a-z]_[0-9]*)"
|
532 |
+
orcidIDRegex = r"((https://orcid.org/)?\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[0-9X])"
|
533 |
+
authorMatch = re.search(arxivAuthorIDRegex + "|" + orcidIDRegex, papers[0])
|
534 |
+
if authorMatch != None:
|
535 |
+
personID = authorMatch.string[authorMatch.start():authorMatch.end()]
|
536 |
+
urlParts = urlparse(queryString)
|
537 |
+
if urlParts.netloc == "arxiv.org":
|
538 |
+
fromUriPath = extractPapersFromArXivUriPath(urlParts.path)
|
539 |
+
if fromUriPath != None:
|
540 |
+
papers = [fromUriPath]
|
541 |
+
|
542 |
+
outputformat = "html"
|
543 |
+
if form.has_key("outputformat"):
|
544 |
+
of = form["outputformat"].value
|
545 |
+
if of in ["html", "raw"]:
|
546 |
+
outputformat = of
|
547 |
+
|
548 |
+
format = "wiki"
|
549 |
+
if isRunningFromBibTeXURI():
|
550 |
+
format = "bibtex"
|
551 |
+
elif isRunningFromHTMLURI():
|
552 |
+
format = "html"
|
553 |
+
if form.has_key("format"):
|
554 |
+
f = form["format"].value
|
555 |
+
if f in ["wiki", "bibtex", "biblatex", "bibitem", "html"]:
|
556 |
+
format = f
|
557 |
+
|
558 |
+
printAll(pageHead(queryString, format, outputformat))
|
559 |
+
|
560 |
+
if form.has_key("q"):
|
561 |
+
failedIDs = []
|
562 |
+
if personID == "":
|
563 |
+
arXivIDs = []
|
564 |
+
for paperID in papers:
|
565 |
+
processedID = prepareArXivID(paperID)
|
566 |
+
if processedID != None:
|
567 |
+
arXivIDs += [processedID]
|
568 |
+
else:
|
569 |
+
failedIDs += [paperID]
|
570 |
+
arXivURL = "https://export.arxiv.org/api/query?id_list=" + ",".join(arXivIDs) + "&max_results=" + str(maxpapers)
|
571 |
+
else:
|
572 |
+
arXivURL = "https://arxiv.org/a/" + personID + ".atom"
|
573 |
+
|
574 |
+
download = urllib.urlopen(arXivURL)
|
575 |
+
download.encoding = "UTF-8"
|
576 |
+
downloadedData = download.read()
|
577 |
+
if downloadedData == None:
|
578 |
+
printHtml(extraInfo(), outputformat)
|
579 |
+
printHtml(errorMarkup("The arXiv data could not be retrieved."), outputformat)
|
580 |
+
else:
|
581 |
+
publications = []
|
582 |
+
feed = xml.etree.ElementTree.fromstring(downloadedData)
|
583 |
+
output = []
|
584 |
+
|
585 |
+
""" Check for an error by looking at the title of the first paper: errors are marked by 'Error', empty feeds don't have a title """
|
586 |
+
firstTitle = feed.find("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}title")
|
587 |
+
if firstTitle == None or firstTitle.text == "Error":
|
588 |
+
lookupSubject = "paper ID"
|
589 |
+
if personID == "" and len(papers) > 1:
|
590 |
+
lookupSubject = "paper IDs"
|
591 |
+
elif personID != "":
|
592 |
+
lookupSubject = "author ID"
|
593 |
+
|
594 |
+
printHtml(extraInfo(), outputformat)
|
595 |
+
printHtml(errorMarkup("The arXiv did not return any results for the " + lookupSubject + " you entered. Any chance there may be a typo in there?"), outputformat)
|
596 |
+
else:
|
597 |
+
""" We got data and no error: Process it. """
|
598 |
+
papersiterator = feed.getiterator("{http://www.w3.org/2005/Atom}entry")
|
599 |
+
for paper in papersiterator:
|
600 |
+
titleElement = paper.find("{http://www.w3.org/2005/Atom}title")
|
601 |
+
if titleElement == None:
|
602 |
+
continue
|
603 |
+
theTitle = re.sub(r"\s*\n\s*", r" ", titleElement.text)
|
604 |
+
authors = paper.getiterator("{http://www.w3.org/2005/Atom}author")
|
605 |
+
theAuthors = []
|
606 |
+
for author in authors:
|
607 |
+
name = author.find("{http://www.w3.org/2005/Atom}name").text
|
608 |
+
theAuthors += [name]
|
609 |
+
theAbstract = paper.find("{http://www.w3.org/2005/Atom}summary").text.strip()
|
610 |
+
|
611 |
+
links = paper.getiterator("{http://www.w3.org/2005/Atom}link")
|
612 |
+
thePDF = ""
|
613 |
+
theLink = ""
|
614 |
+
for link in links:
|
615 |
+
attributes = link.attrib
|
616 |
+
if attributes.has_key("href"):
|
617 |
+
linktarget = attributes["href"]
|
618 |
+
linktype = attributes["type"] if attributes.has_key("type") else None
|
619 |
+
linktitle = attributes["title"] if attributes.has_key("title") else None
|
620 |
+
if linktype == "application/pdf":
|
621 |
+
thePDF = linktarget
|
622 |
+
elif linktype == "text/html":
|
623 |
+
theLink = linktarget
|
624 |
+
splitLink = theLink.split("/abs/")
|
625 |
+
theID = splitLink[-1].split('v')[0]
|
626 |
+
theLink = splitLink[0] + "/abs/" + theID
|
627 |
+
|
628 |
+
theYear = paper.find("{http://www.w3.org/2005/Atom}published").text.split('-')[0]
|
629 |
+
|
630 |
+
theDOIs = []
|
631 |
+
DOIs = paper.getiterator("{http://arxiv.org/schemas/atom}doi")
|
632 |
+
for DOI in DOIs:
|
633 |
+
theDOIs += [DOI.text]
|
634 |
+
|
635 |
+
journal = paper.find("{http://arxiv.org/schemas/atom}journal_ref")
|
636 |
+
theJournal = None
|
637 |
+
if journal != None:
|
638 |
+
theJournal = journal.text
|
639 |
+
|
640 |
+
publicationDict = dict({
|
641 |
+
"ID": theID,
|
642 |
+
"authors": theAuthors,
|
643 |
+
"title": theTitle,
|
644 |
+
"abstract": theAbstract,
|
645 |
+
"year": theYear,
|
646 |
+
"PDF": thePDF,
|
647 |
+
"link": theLink,
|
648 |
+
"DOI": theDOIs,
|
649 |
+
"journal": theJournal})
|
650 |
+
publications += [publicationDict]
|
651 |
+
|
652 |
+
preprintIDs = []
|
653 |
+
preprints = []
|
654 |
+
publishedIDs = []
|
655 |
+
published = []
|
656 |
+
|
657 |
+
publications.sort(comparePaperDictionaries, None, True)
|
658 |
+
|
659 |
+
for publication in publications:
|
660 |
+
if publication["journal"] != None:
|
661 |
+
published += [publication]
|
662 |
+
publishedIDs += [publication["ID"]]
|
663 |
+
else:
|
664 |
+
preprints += [publication]
|
665 |
+
preprintIDs += [publication["ID"]]
|
666 |
+
|
667 |
+
output += ["<div class='formatpicker'>Format:<ul class='outputtypes'>\n",
|
668 |
+
"""<li><a href='javascript:showType("bibtex");' id='bibtex-link'>BibTeX</a></li>\n""",
|
669 |
+
"""<li><a href='javascript:showType("biblatex");' id='biblatex-link'>BibLaTeX</a></li>\n""",
|
670 |
+
"""<li><a href='javascript:showType("bibitem");' id='bibitem-link'>\\bibitem</a></li>\n""",
|
671 |
+
"""<li><a href='javascript:showType("html");' id='html-link'>HTML</a></li>\n""",
|
672 |
+
"""<li><a href='javascript:showType("wiki");' id='wiki-link'>Wiki</a></li>\n""",
|
673 |
+
"</ul>\n</div>\n"]
|
674 |
+
|
675 |
+
if len(papers) >= maxpapers:
|
676 |
+
output += ["<div class='warning'>We can only process " + str(maxpapers) + " paper IDs at a time. " + str(len(papers) - maxpapers) + " of the IDs you entered were ignored.</div>"]
|
677 |
+
|
678 |
+
journalrefnote = """<p><em>Please <a class="editlink" href="https://arxiv.org/user/" title="Go to arXiv user page where you can edit the information stored for your papers.">add the journal reference and <abbr title="Document Object Identifier">DOI</abbr> for your papers as soon as they are published</a>.</em></p>"""
|
679 |
+
|
680 |
+
output += ["<div id='bibtex'>\n"]
|
681 |
+
if len(preprints) > 0:
|
682 |
+
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
683 |
+
output += bibTeXMarkup(preprints, "bibtex")
|
684 |
+
if len(published) > 0:
|
685 |
+
output += ["<h2>Published:</h2>\n"]
|
686 |
+
output += ["""<p>These BibTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
|
687 |
+
output += bibTeXMarkup(published, "bibtex")
|
688 |
+
output += ["</div>\n"]
|
689 |
+
|
690 |
+
output += ["<div id='biblatex'>\n"]
|
691 |
+
if len(preprints) > 0:
|
692 |
+
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
693 |
+
output += bibTeXMarkup(preprints, "biblatex")
|
694 |
+
if len(published) > 0:
|
695 |
+
output += ["<h2>Published:</h2>\n"]
|
696 |
+
output += ["""<p>These BibLaTeX records are based on arXiv information only. You may prefer getting the more detailed records provided by <a href="https://mathscinet.ams.org/mathscinet/">MathSciNet</a> instead.</p>\n"""]
|
697 |
+
output += bibTeXMarkup(published, "biblatex")
|
698 |
+
output += ["</div>\n"]
|
699 |
+
|
700 |
+
output += ["<div id='bibitem'>\n"]
|
701 |
+
if len(preprints) > 0:
|
702 |
+
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
703 |
+
output += bibItemMarkup(preprints)
|
704 |
+
if len(published) > 0:
|
705 |
+
output += ["<h2>Published:</h2>\n"]
|
706 |
+
output += bibItemMarkup(published)
|
707 |
+
output += ["</div>\n"]
|
708 |
+
|
709 |
+
output += ["<div id='html'>\n"]
|
710 |
+
if len(preprints) > 0:
|
711 |
+
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
712 |
+
output += htmlMarkup(preprints, "Preprint")
|
713 |
+
if len(published) > 0:
|
714 |
+
output += ["<h2>Published:</h2>\n"]
|
715 |
+
output += htmlMarkup(published, "Published")
|
716 |
+
output += ["</div>\n"]
|
717 |
+
|
718 |
+
output += ["<div id='wiki'>\n"]
|
719 |
+
if len(preprints) > 0:
|
720 |
+
output += ["<h2>Preprints:</h2>\n", journalrefnote]
|
721 |
+
output += wikiMarkup(preprints, "Preprint")
|
722 |
+
if len(published) > 0:
|
723 |
+
output += ["<h2>Published:</h2>\n"]
|
724 |
+
output += wikiMarkup(published, "Published")
|
725 |
+
output += ["</div>\n"]
|
726 |
+
|
727 |
+
|
728 |
+
if len(failedIDs) > 0:
|
729 |
+
if len(failedIDs) == 1:
|
730 |
+
printHtml("""<div class="warning">No paper with the ID “""" + failedIDs[0] + """” could be found on the arXiv.</div>\n""", outputformat)
|
731 |
+
else:
|
732 |
+
printHtml("""<div class="warning">The following paper IDs could not be found on the arXiv: """ + ", ".join(failedIDs) + """.</div>\n""", outputformat)
|
733 |
+
|
734 |
+
printHtml("".join(output), outputformat)
|
735 |
+
printPublicationsRaw(publications, format, outputformat)
|
736 |
+
else:
|
737 |
+
printHtml(extraInfo(), outputformat)
|
738 |
+
|
739 |
+
printHtml(pageFoot(), outputformat)
|
740 |
+
|
741 |
+
|
742 |
+
"""
|
743 |
+
MAIN SCRIPT *****************************************************************
|
744 |
+
"""
|
745 |
+
# form = cgi.FieldStorage()
|
746 |
+
# processCgi(form)
|