Spaces:
Sleeping
Sleeping
File size: 8,885 Bytes
7d134e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
from __future__ import annotations
import io
import os
import typing
from pathlib import Path
from ._types import (
AsyncByteStream,
FileContent,
FileTypes,
RequestData,
RequestFiles,
SyncByteStream,
)
from ._utils import (
format_form_param,
guess_content_type,
peek_filelike_length,
primitive_value_to_str,
to_bytes,
)
def get_multipart_boundary_from_content_type(
content_type: bytes | None,
) -> bytes | None:
if not content_type or not content_type.startswith(b"multipart/form-data"):
return None
# parse boundary according to
# https://www.rfc-editor.org/rfc/rfc2046#section-5.1.1
if b";" in content_type:
for section in content_type.split(b";"):
if section.strip().lower().startswith(b"boundary="):
return section.strip()[len(b"boundary=") :].strip(b'"')
return None
class DataField:
"""
A single form field item, within a multipart form field.
"""
def __init__(self, name: str, value: str | bytes | int | float | None) -> None:
if not isinstance(name, str):
raise TypeError(
f"Invalid type for name. Expected str, got {type(name)}: {name!r}"
)
if value is not None and not isinstance(value, (str, bytes, int, float)):
raise TypeError(
"Invalid type for value. Expected primitive type,"
f" got {type(value)}: {value!r}"
)
self.name = name
self.value: str | bytes = (
value if isinstance(value, bytes) else primitive_value_to_str(value)
)
def render_headers(self) -> bytes:
if not hasattr(self, "_headers"):
name = format_form_param("name", self.name)
self._headers = b"".join(
[b"Content-Disposition: form-data; ", name, b"\r\n\r\n"]
)
return self._headers
def render_data(self) -> bytes:
if not hasattr(self, "_data"):
self._data = to_bytes(self.value)
return self._data
def get_length(self) -> int:
headers = self.render_headers()
data = self.render_data()
return len(headers) + len(data)
def render(self) -> typing.Iterator[bytes]:
yield self.render_headers()
yield self.render_data()
class FileField:
"""
A single file field item, within a multipart form field.
"""
CHUNK_SIZE = 64 * 1024
def __init__(self, name: str, value: FileTypes) -> None:
self.name = name
fileobj: FileContent
headers: dict[str, str] = {}
content_type: str | None = None
# This large tuple based API largely mirror's requests' API
# It would be good to think of better APIs for this that we could
# include in httpx 2.0 since variable length tuples(especially of 4 elements)
# are quite unwieldly
if isinstance(value, tuple):
if len(value) == 2:
# neither the 3rd parameter (content_type) nor the 4th (headers)
# was included
filename, fileobj = value
elif len(value) == 3:
filename, fileobj, content_type = value
else:
# all 4 parameters included
filename, fileobj, content_type, headers = value # type: ignore
else:
filename = Path(str(getattr(value, "name", "upload"))).name
fileobj = value
if content_type is None:
content_type = guess_content_type(filename)
has_content_type_header = any("content-type" in key.lower() for key in headers)
if content_type is not None and not has_content_type_header:
# note that unlike requests, we ignore the content_type provided in the 3rd
# tuple element if it is also included in the headers requests does
# the opposite (it overwrites the headerwith the 3rd tuple element)
headers["Content-Type"] = content_type
if isinstance(fileobj, io.StringIO):
raise TypeError(
"Multipart file uploads require 'io.BytesIO', not 'io.StringIO'."
)
if isinstance(fileobj, io.TextIOBase):
raise TypeError(
"Multipart file uploads must be opened in binary mode, not text mode."
)
self.filename = filename
self.file = fileobj
self.headers = headers
def get_length(self) -> int | None:
headers = self.render_headers()
if isinstance(self.file, (str, bytes)):
return len(headers) + len(to_bytes(self.file))
file_length = peek_filelike_length(self.file)
# If we can't determine the filesize without reading it into memory,
# then return `None` here, to indicate an unknown file length.
if file_length is None:
return None
return len(headers) + file_length
def render_headers(self) -> bytes:
if not hasattr(self, "_headers"):
parts = [
b"Content-Disposition: form-data; ",
format_form_param("name", self.name),
]
if self.filename:
filename = format_form_param("filename", self.filename)
parts.extend([b"; ", filename])
for header_name, header_value in self.headers.items():
key, val = f"\r\n{header_name}: ".encode(), header_value.encode()
parts.extend([key, val])
parts.append(b"\r\n\r\n")
self._headers = b"".join(parts)
return self._headers
def render_data(self) -> typing.Iterator[bytes]:
if isinstance(self.file, (str, bytes)):
yield to_bytes(self.file)
return
if hasattr(self.file, "seek"):
try:
self.file.seek(0)
except io.UnsupportedOperation:
pass
chunk = self.file.read(self.CHUNK_SIZE)
while chunk:
yield to_bytes(chunk)
chunk = self.file.read(self.CHUNK_SIZE)
def render(self) -> typing.Iterator[bytes]:
yield self.render_headers()
yield from self.render_data()
class MultipartStream(SyncByteStream, AsyncByteStream):
"""
Request content as streaming multipart encoded form data.
"""
def __init__(
self,
data: RequestData,
files: RequestFiles,
boundary: bytes | None = None,
) -> None:
if boundary is None:
boundary = os.urandom(16).hex().encode("ascii")
self.boundary = boundary
self.content_type = "multipart/form-data; boundary=%s" % boundary.decode(
"ascii"
)
self.fields = list(self._iter_fields(data, files))
def _iter_fields(
self, data: RequestData, files: RequestFiles
) -> typing.Iterator[FileField | DataField]:
for name, value in data.items():
if isinstance(value, (tuple, list)):
for item in value:
yield DataField(name=name, value=item)
else:
yield DataField(name=name, value=value)
file_items = files.items() if isinstance(files, typing.Mapping) else files
for name, value in file_items:
yield FileField(name=name, value=value)
def iter_chunks(self) -> typing.Iterator[bytes]:
for field in self.fields:
yield b"--%s\r\n" % self.boundary
yield from field.render()
yield b"\r\n"
yield b"--%s--\r\n" % self.boundary
def get_content_length(self) -> int | None:
"""
Return the length of the multipart encoded content, or `None` if
any of the files have a length that cannot be determined upfront.
"""
boundary_length = len(self.boundary)
length = 0
for field in self.fields:
field_length = field.get_length()
if field_length is None:
return None
length += 2 + boundary_length + 2 # b"--{boundary}\r\n"
length += field_length
length += 2 # b"\r\n"
length += 2 + boundary_length + 4 # b"--{boundary}--\r\n"
return length
# Content stream interface.
def get_headers(self) -> dict[str, str]:
content_length = self.get_content_length()
content_type = self.content_type
if content_length is None:
return {"Transfer-Encoding": "chunked", "Content-Type": content_type}
return {"Content-Length": str(content_length), "Content-Type": content_type}
def __iter__(self) -> typing.Iterator[bytes]:
for chunk in self.iter_chunks():
yield chunk
async def __aiter__(self) -> typing.AsyncIterator[bytes]:
for chunk in self.iter_chunks():
yield chunk
|