File size: 9,997 Bytes
cf2a15a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TensorBoard HTTP utilities."""


import gzip
import io
import json
import re
import struct
import time
import wsgiref.handlers

import werkzeug

from tensorboard.backend import json_util

_DISALLOWED_CHAR_IN_DOMAIN = re.compile(r"\s")

# TODO(stephanwlee): Refactor this to not use the module variable but
# instead use a configurable via some kind of assets provider which would
# hold configurations for the CSP.

# @vaadin/vaadin-lumo-styles/font-icons(via vaadin-grid) uses data URI for
# loading font icons.
_CSP_FONT_DOMAINS_WHITELIST = ["data:"]
_CSP_FRAME_DOMAINS_WHITELIST = []
_CSP_IMG_DOMAINS_WHITELIST = []
_CSP_SCRIPT_DOMAINS_WHITELIST = []
_CSP_CONNECT_DOMAINS_WHITELIST = []
_CSP_SCRIPT_SELF = True
# numericjs (via projector) uses unsafe-eval :(.
_CSP_SCRIPT_UNSAFE_EVAL = True
_CSP_STYLE_DOMAINS_WHITELIST = []

_EXTRACT_MIMETYPE_PATTERN = re.compile(r"^[^;\s]*")
_EXTRACT_CHARSET_PATTERN = re.compile(r"charset=([-_0-9A-Za-z]+)")

# Allows *, gzip or x-gzip, but forbid gzip;q=0
# https://tools.ietf.org/html/rfc7231#section-5.3.4
_ALLOWS_GZIP_PATTERN = re.compile(
    r"(?:^|,|\s)(?:(?:x-)?gzip|\*)(?!;q=0)(?:\s|,|$)"
)

_TEXTUAL_MIMETYPES = set(
    [
        "application/javascript",
        "application/json",
        "application/json+protobuf",
        "image/svg+xml",
        "text/css",
        "text/csv",
        "text/html",
        "text/javascript",
        "text/plain",
        "text/tab-separated-values",
        "text/x-protobuf",
    ]
)

_JSON_MIMETYPES = set(
    [
        "application/json",
        "application/json+protobuf",
    ]
)

# Do not support xhtml for now.
_HTML_MIMETYPE = "text/html"


def Respond(
    request,
    content,
    content_type,
    code=200,
    expires=0,
    content_encoding=None,
    encoding="utf-8",
    csp_scripts_sha256s=None,
    headers=None,
):
    """Construct a werkzeug Response.

    Responses are transmitted to the browser with compression if: a) the browser
    supports it; b) it's sane to compress the content_type in question; and c)
    the content isn't already compressed, as indicated by the content_encoding
    parameter.

    Browser and proxy caching is completely disabled by default. If the expires
    parameter is greater than zero then the response will be able to be cached by
    the browser for that many seconds; however, proxies are still forbidden from
    caching so that developers can bypass the cache with Ctrl+Shift+R.

    For textual content that isn't JSON, the encoding parameter is used as the
    transmission charset which is automatically appended to the Content-Type
    header. That is unless of course the content_type parameter contains a
    charset parameter. If the two disagree, the characters in content will be
    transcoded to the latter.

    If content_type declares a JSON media type, then content MAY be a dict, list,
    tuple, or set, in which case this function has an implicit composition with
    json_util.Cleanse and json.dumps. The encoding parameter is used to decode
    byte strings within the JSON object; therefore transmitting binary data
    within JSON is not permitted. JSON is transmitted as ASCII unless the
    content_type parameter explicitly defines a charset parameter, in which case
    the serialized JSON bytes will use that instead of escape sequences.

    Args:
      request: A werkzeug Request object. Used mostly to check the
        Accept-Encoding header.
      content: Payload data as byte string, unicode string, or maybe JSON.
      content_type: Media type and optionally an output charset.
      code: Numeric HTTP status code to use.
      expires: Second duration for browser caching.
      content_encoding: Encoding if content is already encoded, e.g. 'gzip'.
      encoding: Input charset if content parameter has byte strings.
      csp_scripts_sha256s: List of base64 serialized sha256 of whitelisted script
        elements for script-src of the Content-Security-Policy. If it is None, the
        HTML will disallow any script to execute. It is only be used when the
        content_type is text/html.
      headers: Any additional headers to include on the response, as a
        list of key-value tuples: e.g., `[("Allow", "GET")]`. In case of
        conflict, these may be overridden with headers added by this function.

    Returns:
      A werkzeug Response object (a WSGI application).
    """

    mimetype = _EXTRACT_MIMETYPE_PATTERN.search(content_type).group(0)
    charset_match = _EXTRACT_CHARSET_PATTERN.search(content_type)
    charset = charset_match.group(1) if charset_match else encoding
    textual = charset_match or mimetype in _TEXTUAL_MIMETYPES
    if mimetype in _JSON_MIMETYPES and isinstance(
        content, (dict, list, set, tuple)
    ):
        content = json.dumps(
            json_util.Cleanse(content, encoding), ensure_ascii=not charset_match
        )

    # Ensure correct output encoding, transcoding if necessary.
    if charset != encoding and isinstance(content, bytes):
        content = content.decode(encoding)
    if isinstance(content, str):
        content = content.encode(charset)

    if textual and not charset_match and mimetype not in _JSON_MIMETYPES:
        content_type += "; charset=" + charset
    gzip_accepted = _ALLOWS_GZIP_PATTERN.search(
        request.headers.get("Accept-Encoding", "")
    )
    # Automatically gzip uncompressed text data if accepted.
    if textual and not content_encoding and gzip_accepted:
        out = io.BytesIO()
        # Set mtime to zero to make payload for a given input deterministic.
        with gzip.GzipFile(
            fileobj=out, mode="wb", compresslevel=3, mtime=0
        ) as f:
            f.write(content)
        content = out.getvalue()
        content_encoding = "gzip"

    content_length = len(content)
    direct_passthrough = False
    # Automatically streamwise-gunzip precompressed data if not accepted.
    if content_encoding == "gzip" and not gzip_accepted:
        gzip_file = gzip.GzipFile(fileobj=io.BytesIO(content), mode="rb")
        # Last 4 bytes of gzip formatted data (little-endian) store the original
        # content length mod 2^32; we just assume it's the content length. That
        # means we can't streamwise-gunzip >4 GB precompressed file; this is ok.
        content_length = struct.unpack("<I", content[-4:])[0]
        content = werkzeug.wsgi.wrap_file(request.environ, gzip_file)
        content_encoding = None
        direct_passthrough = True

    headers = list(headers or [])
    headers.append(("Content-Length", str(content_length)))
    headers.append(("X-Content-Type-Options", "nosniff"))
    if content_encoding:
        headers.append(("Content-Encoding", content_encoding))
    if expires > 0:
        e = wsgiref.handlers.format_date_time(time.time() + float(expires))
        headers.append(("Expires", e))
        headers.append(("Cache-Control", "private, max-age=%d" % expires))
    else:
        headers.append(("Expires", "0"))
        headers.append(("Cache-Control", "no-cache, must-revalidate"))
    if mimetype == _HTML_MIMETYPE:
        frags = (
            _CSP_SCRIPT_DOMAINS_WHITELIST
            + [
                "'self'" if _CSP_SCRIPT_SELF else "",
                "'unsafe-eval'" if _CSP_SCRIPT_UNSAFE_EVAL else "",
            ]
            + [
                "'sha256-{}'".format(sha256)
                for sha256 in (csp_scripts_sha256s or [])
            ]
        )
        script_srcs = _create_csp_string(*frags)

        csp_string = ";".join(
            [
                "default-src 'self'",
                "font-src %s"
                % _create_csp_string("'self'", *_CSP_FONT_DOMAINS_WHITELIST),
                # Dynamic plugins are rendered inside an iframe.
                "frame-src %s"
                % _create_csp_string("'self'", *_CSP_FRAME_DOMAINS_WHITELIST),
                "img-src %s"
                % _create_csp_string(
                    "'self'",
                    # used by favicon
                    "data:",
                    # used by What-If tool for image sprites.
                    "blob:",
                    *_CSP_IMG_DOMAINS_WHITELIST,
                ),
                "object-src 'none'",
                "style-src %s"
                % _create_csp_string(
                    "'self'",
                    # used by google-chart
                    "https://www.gstatic.com",
                    "data:",
                    # inline styles: Polymer templates + d3 uses inline styles.
                    "'unsafe-inline'",
                    *_CSP_STYLE_DOMAINS_WHITELIST,
                ),
                "connect-src %s"
                % _create_csp_string("'self'", *_CSP_CONNECT_DOMAINS_WHITELIST),
                "script-src %s" % script_srcs,
            ]
        )

        headers.append(("Content-Security-Policy", csp_string))

    if request.method == "HEAD":
        content = None

    return werkzeug.wrappers.Response(
        response=content,
        status=code,
        headers=headers,
        content_type=content_type,
        direct_passthrough=direct_passthrough,
    )


def _create_csp_string(*csp_fragments):
    csp_string = " ".join([frag for frag in csp_fragments if frag])
    return csp_string if csp_string else "'none'"