Spaces:

yasirme
/

ChatBot-UI-With-API

Runtime error

ChatBot-UI-With-API / venv /lib /python3.11 /site-packages /werkzeug /sansio /multipart.py

scipher.alpha

Improved UI

47b2311 9 months ago

11.6 kB

	from __future__ import annotations

	import re
	import typing as t
	from dataclasses import dataclass
	from enum import auto
	from enum import Enum

	from ..datastructures import Headers
	from ..exceptions import RequestEntityTooLarge
	from ..http import parse_options_header


	class Event:
	pass


	@dataclass(frozen=True)
	class Preamble(Event):
	data: bytes


	@dataclass(frozen=True)
	class Field(Event):
	name: str
	headers: Headers


	@dataclass(frozen=True)
	class File(Event):
	name: str
	filename: str
	headers: Headers


	@dataclass(frozen=True)
	class Data(Event):
	data: bytes
	more_data: bool


	@dataclass(frozen=True)
	class Epilogue(Event):
	data: bytes


	class NeedData(Event):
	pass


	NEED_DATA = NeedData()


	class State(Enum):
	PREAMBLE = auto()
	PART = auto()
	DATA = auto()
	DATA_START = auto()
	EPILOGUE = auto()
	COMPLETE = auto()


	# Multipart line breaks MUST be CRLF (\r\n) by RFC-7578, except that
	# many implementations break this and either use CR or LF alone.
	LINE_BREAK = b"(?:\r\n\|\n\|\r)"
	BLANK_LINE_RE = re.compile(b"(?:\r\n\r\n\|\r\r\|\n\n)", re.MULTILINE)
	LINE_BREAK_RE = re.compile(LINE_BREAK, re.MULTILINE)
	# Header values can be continued via a space or tab after the linebreak, as
	# per RFC2231
	HEADER_CONTINUATION_RE = re.compile(b"%s[ \t]" % LINE_BREAK, re.MULTILINE)
	# This must be long enough to contain any line breaks plus any
	# additional boundary markers (--) such that they will be found in a
	# subsequent search
	SEARCH_EXTRA_LENGTH = 8


	class MultipartDecoder:
	"""Decodes a multipart message as bytes into Python events.

	The part data is returned as available to allow the caller to save
	the data from memory to disk, if desired.
	"""

	def __init__(
	self,
	boundary: bytes,
	max_form_memory_size: int \| None = None,
	*,
	max_parts: int \| None = None,
	) -> None:
	self.buffer = bytearray()
	self.complete = False
	self.max_form_memory_size = max_form_memory_size
	self.max_parts = max_parts
	self.state = State.PREAMBLE
	self.boundary = boundary

	# Note in the below \h i.e. horizontal whitespace is used
	# as [^\S\n\r] as \h isn't supported in python.

	# The preamble must end with a boundary where the boundary is
	# prefixed by a line break, RFC2046. Except that many
	# implementations including Werkzeug's tests omit the line
	# break prefix. In addition the first boundary could be the
	# epilogue boundary (for empty form-data) hence the matching
	# group to understand if it is an epilogue boundary.
	self.preamble_re = re.compile(
	rb"%s?--%s(--[^\S\n\r]%s?\|[^\S\n\r]%s)"
	% (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK),
	re.MULTILINE,
	)
	# A boundary must include a line break prefix and suffix, and
	# may include trailing whitespace. In addition the boundary
	# could be the epilogue boundary hence the matching group to
	# understand if it is an epilogue boundary.
	self.boundary_re = re.compile(
	rb"%s--%s(--[^\S\n\r]%s?\|[^\S\n\r]%s)"
	% (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK),
	re.MULTILINE,
	)
	self._search_position = 0
	self._parts_decoded = 0

	def last_newline(self, data: bytes) -> int:
	try:
	last_nl = data.rindex(b"\n")
	except ValueError:
	last_nl = len(data)
	try:
	last_cr = data.rindex(b"\r")
	except ValueError:
	last_cr = len(data)

	return min(last_nl, last_cr)

	def receive_data(self, data: bytes \| None) -> None:
	if data is None:
	self.complete = True
	elif (
	self.max_form_memory_size is not None
	and len(self.buffer) + len(data) > self.max_form_memory_size
	):
	# Ensure that data within single event does not exceed limit.
	# Also checked across accumulated events in MultiPartParser.
	raise RequestEntityTooLarge()
	else:
	self.buffer.extend(data)

	def next_event(self) -> Event:
	event: Event = NEED_DATA

	if self.state == State.PREAMBLE:
	match = self.preamble_re.search(self.buffer, self._search_position)
	if match is not None:
	if match.group(1).startswith(b"--"):
	self.state = State.EPILOGUE
	else:
	self.state = State.PART
	data = bytes(self.buffer[: match.start()])
	del self.buffer[: match.end()]
	event = Preamble(data=data)
	self._search_position = 0
	else:
	# Update the search start position to be equal to the
	# current buffer length (already searched) minus a
	# safe buffer for part of the search target.
	self._search_position = max(
	0, len(self.buffer) - len(self.boundary) - SEARCH_EXTRA_LENGTH
	)

	elif self.state == State.PART:
	match = BLANK_LINE_RE.search(self.buffer, self._search_position)
	if match is not None:
	headers = self._parse_headers(self.buffer[: match.start()])
	# The final header ends with a single CRLF, however a
	# blank line indicates the start of the
	# body. Therefore the end is after the first CRLF.
	headers_end = (match.start() + match.end()) // 2
	del self.buffer[:headers_end]

	if "content-disposition" not in headers:
	raise ValueError("Missing Content-Disposition header")

	disposition, extra = parse_options_header(
	headers["content-disposition"]
	)
	name = t.cast(str, extra.get("name"))
	filename = extra.get("filename")
	if filename is not None:
	event = File(
	filename=filename,
	headers=headers,
	name=name,
	)
	else:
	event = Field(
	headers=headers,
	name=name,
	)
	self.state = State.DATA_START
	self._search_position = 0
	self._parts_decoded += 1

	if self.max_parts is not None and self._parts_decoded > self.max_parts:
	raise RequestEntityTooLarge()
	else:
	# Update the search start position to be equal to the
	# current buffer length (already searched) minus a
	# safe buffer for part of the search target.
	self._search_position = max(0, len(self.buffer) - SEARCH_EXTRA_LENGTH)

	elif self.state == State.DATA_START:
	data, del_index, more_data = self._parse_data(self.buffer, start=True)
	del self.buffer[:del_index]
	event = Data(data=data, more_data=more_data)
	if more_data:
	self.state = State.DATA

	elif self.state == State.DATA:
	data, del_index, more_data = self._parse_data(self.buffer, start=False)
	del self.buffer[:del_index]
	if data or not more_data:
	event = Data(data=data, more_data=more_data)

	elif self.state == State.EPILOGUE and self.complete:
	event = Epilogue(data=bytes(self.buffer))
	del self.buffer[:]
	self.state = State.COMPLETE

	if self.complete and isinstance(event, NeedData):
	raise ValueError(f"Invalid form-data cannot parse beyond {self.state}")

	return event

	def _parse_headers(self, data: bytes) -> Headers:
	headers: list[tuple[str, str]] = []
	# Merge the continued headers into one line
	data = HEADER_CONTINUATION_RE.sub(b" ", data)
	# Now there is one header per line
	for line in data.splitlines():
	line = line.strip()

	if line != b"":
	name, _, value = line.decode().partition(":")
	headers.append((name.strip(), value.strip()))
	return Headers(headers)

	def _parse_data(self, data: bytes, *, start: bool) -> tuple[bytes, int, bool]:
	# Body parts must start with CRLF (or CR or LF)
	if start:
	match = LINE_BREAK_RE.match(data)
	data_start = t.cast(t.Match[bytes], match).end()
	else:
	data_start = 0

	boundary = b"--" + self.boundary

	if self.buffer.find(boundary) == -1:
	# No complete boundary in the buffer, but there may be
	# a partial boundary at the end. As the boundary
	# starts with either a nl or cr find the earliest and
	# return up to that as data.
	data_end = del_index = self.last_newline(data[data_start:]) + data_start
	# If amount of data after last newline is far from
	# possible length of partial boundary, we should
	# assume that there is no partial boundary in the buffer
	# and return all pending data.
	if (len(data) - data_end) > len(b"\n" + boundary):
	data_end = del_index = len(data)
	more_data = True
	else:
	match = self.boundary_re.search(data)
	if match is not None:
	if match.group(1).startswith(b"--"):
	self.state = State.EPILOGUE
	else:
	self.state = State.PART
	data_end = match.start()
	del_index = match.end()
	else:
	data_end = del_index = self.last_newline(data[data_start:]) + data_start
	more_data = match is None

	return bytes(data[data_start:data_end]), del_index, more_data


	class MultipartEncoder:
	def __init__(self, boundary: bytes) -> None:
	self.boundary = boundary
	self.state = State.PREAMBLE

	def send_event(self, event: Event) -> bytes:
	if isinstance(event, Preamble) and self.state == State.PREAMBLE:
	self.state = State.PART
	return event.data
	elif isinstance(event, (Field, File)) and self.state in {
	State.PREAMBLE,
	State.PART,
	State.DATA,
	}:
	data = b"\r\n--" + self.boundary + b"\r\n"
	data += b'Content-Disposition: form-data; name="%s"' % event.name.encode()
	if isinstance(event, File):
	data += b'; filename="%s"' % event.filename.encode()
	data += b"\r\n"
	for name, value in t.cast(Field, event).headers:
	if name.lower() != "content-disposition":
	data += f"{name}: {value}\r\n".encode()
	self.state = State.DATA_START
	return data
	elif isinstance(event, Data) and self.state == State.DATA_START:
	self.state = State.DATA
	if len(event.data) > 0:
	return b"\r\n" + event.data
	else:
	return event.data
	elif isinstance(event, Data) and self.state == State.DATA:
	return event.data
	elif isinstance(event, Epilogue):
	self.state = State.COMPLETE
	return b"\r\n--" + self.boundary + b"--\r\n" + event.data
	else:
	raise ValueError(f"Cannot generate {event} in state: {self.state}")