GameServerX

Running

App Files Files Community

GameServerX / MLPY /Lib /site-packages /torchaudio /io /_effector.py

Kano001

Upload 462 files

864affd verified 10 months ago

raw

history blame

12.2 kB

	import io
	from typing import Iterator, List, Optional

	import torch
	from torch import Tensor

	from torio.io._streaming_media_decoder import _get_afilter_desc, StreamingMediaDecoder as StreamReader
	from torio.io._streaming_media_encoder import CodecConfig, StreamingMediaEncoder as StreamWriter


	class _StreamingIOBuffer:
	"""Streaming Bytes IO buffer. Data are dropped when read."""

	def __init__(self):
	self._buffer: List(bytes) = []

	def write(self, b: bytes):
	if b:
	self._buffer.append(b)
	return len(b)

	def pop(self, n):
	"""Pop the oldest byte string. It does not necessary return the requested amount"""
	if not self._buffer:
	return b""
	if len(self._buffer[0]) <= n:
	return self._buffer.pop(0)
	ret = self._buffer[0][:n]
	self._buffer[0] = self._buffer[0][n:]
	return ret


	def _get_sample_fmt(dtype: torch.dtype):
	types = {
	torch.uint8: "u8",
	torch.int16: "s16",
	torch.int32: "s32",
	torch.float32: "flt",
	torch.float64: "dbl",
	}
	if dtype not in types:
	raise ValueError(f"Unsupported dtype is provided {dtype}. Supported dtypes are: {types.keys()}")
	return types[dtype]


	class _AudioStreamingEncoder:
	"""Given a waveform, encode on-demand and return bytes"""

	def __init__(
	self,
	src: Tensor,
	sample_rate: int,
	effect: str,
	muxer: str,
	encoder: Optional[str],
	codec_config: Optional[CodecConfig],
	frames_per_chunk: int,
	):
	self.src = src
	self.buffer = _StreamingIOBuffer()
	self.writer = StreamWriter(self.buffer, format=muxer)
	self.writer.add_audio_stream(
	num_channels=src.size(1),
	sample_rate=sample_rate,
	format=_get_sample_fmt(src.dtype),
	encoder=encoder,
	filter_desc=effect,
	codec_config=codec_config,
	)
	self.writer.open()
	self.fpc = frames_per_chunk

	# index on the input tensor (along time-axis)
	# we use -1 to indicate that we finished iterating the tensor and
	# the writer is closed.
	self.i_iter = 0

	def read(self, n):
	while not self.buffer._buffer and self.i_iter >= 0:
	self.writer.write_audio_chunk(0, self.src[self.i_iter : self.i_iter + self.fpc])
	self.i_iter += self.fpc
	if self.i_iter >= self.src.size(0):
	self.writer.flush()
	self.writer.close()
	self.i_iter = -1
	return self.buffer.pop(n)


	def _encode(
	src: Tensor,
	sample_rate: int,
	effect: str,
	muxer: str,
	encoder: Optional[str],
	codec_config: Optional[CodecConfig],
	):
	buffer = io.BytesIO()
	writer = StreamWriter(buffer, format=muxer)
	writer.add_audio_stream(
	num_channels=src.size(1),
	sample_rate=sample_rate,
	format=_get_sample_fmt(src.dtype),
	encoder=encoder,
	filter_desc=effect,
	codec_config=codec_config,
	)
	with writer.open():
	writer.write_audio_chunk(0, src)
	buffer.seek(0)
	return buffer


	def _get_muxer(dtype: torch.dtype):
	# TODO: check if this works in Windows.
	types = {
	torch.uint8: "u8",
	torch.int16: "s16le",
	torch.int32: "s32le",
	torch.float32: "f32le",
	torch.float64: "f64le",
	}
	if dtype not in types:
	raise ValueError(f"Unsupported dtype is provided {dtype}. Supported dtypes are: {types.keys()}")
	return types[dtype]


	class AudioEffector:
	"""Apply various filters and/or codecs to waveforms.

	.. versionadded:: 2.1

	Args:
	effect (str or None, optional): Filter expressions or ``None`` to apply no filter.
	See https://ffmpeg.org/ffmpeg-filters.html#Audio-Filters for the
	details of filter syntax.

	format (str or None, optional): When provided, encode the audio into the
	corresponding format. Default: ``None``.

	encoder (str or None, optional): When provided, override the encoder used
	by the ``format``. Default: ``None``.

	codec_config (CodecConfig or None, optional): When provided, configure the encoding codec.
	Should be provided in conjunction with ``format`` option.

	pad_end (bool, optional): When enabled, and if the waveform becomes shorter after applying
	effects/codec, then pad the end with silence.

	Example - Basic usage
	To use ``AudioEffector``, first instantiate it with a set of
	``effect`` and ``format``.

	>>> # instantiate the effector
	>>> effector = AudioEffector(effect=..., format=...)

	Then, use :py:meth:`~AudioEffector.apply` or :py:meth:`~AudioEffector.stream`
	method to apply them.

	>>> # Apply the effect to the whole waveform
	>>> applied = effector.apply(waveform, sample_rate)

	>>> # Apply the effect chunk-by-chunk
	>>> for chunk in effector.stream(waveform, sample_rate):
	>>> ...

	Example - Applying effects
	Please refer to
	https://ffmpeg.org/ffmpeg-filters.html#Filtergraph-description
	for the overview of filter description, and
	https://ffmpeg.org/ffmpeg-filters.html#toc-Audio-Filters
	for the list of available filters.

	Tempo - https://ffmpeg.org/ffmpeg-filters.html#atempo

	>>> AudioEffector(effect="atempo=1.5")

	Echo - https://ffmpeg.org/ffmpeg-filters.html#aecho

	>>> AudioEffector(effect="aecho=0.8:0.88:60:0.4")

	Flanger - https://ffmpeg.org/ffmpeg-filters.html#flanger

	>>> AudioEffector(effect="aflanger")

	Vibrato - https://ffmpeg.org/ffmpeg-filters.html#vibrato

	>>> AudioEffector(effect="vibrato")

	Tremolo - https://ffmpeg.org/ffmpeg-filters.html#tremolo

	>>> AudioEffector(effect="vibrato")

	You can also apply multiple effects at once.

	>>> AudioEffector(effect="")

	Example - Applying codec
	One can apply codec using ``format`` argument. ``format`` can be
	audio format or container format. If the container format supports
	multiple encoders, you can specify it with ``encoder`` argument.

	Wav format
	(no compression is applied but samples are converted to
	16-bit signed integer)

	>>> AudioEffector(format="wav")

	Ogg format with default encoder

	>>> AudioEffector(format="ogg")

	Ogg format with vorbis

	>>> AudioEffector(format="ogg", encoder="vorbis")

	Ogg format with opus

	>>> AudioEffector(format="ogg", encoder="opus")

	Webm format with opus

	>>> AudioEffector(format="webm", encoder="opus")

	Example - Applying codec with configuration
	Reference: https://trac.ffmpeg.org/wiki/Encode/MP3

	MP3 with default config

	>>> AudioEffector(format="mp3")

	MP3 with variable bitrate

	>>> AudioEffector(format="mp3", codec_config=CodecConfig(qscale=5))

	MP3 with constant bitrate

	>>> AudioEffector(format="mp3", codec_config=CodecConfig(bit_rate=32_000))
	"""

	def __init__(
	self,
	effect: Optional[str] = None,
	format: Optional[str] = None,
	*,
	encoder: Optional[str] = None,
	codec_config: Optional[CodecConfig] = None,
	pad_end: bool = True,
	):
	if format is None:
	if encoder is not None or codec_config is not None:
	raise ValueError("`encoder` and/or `condec_config` opions are provided without `format` option.")
	self.effect = effect
	self.format = format
	self.encoder = encoder
	self.codec_config = codec_config
	self.pad_end = pad_end

	def _get_reader(self, waveform, sample_rate, output_sample_rate, frames_per_chunk=None):
	num_frames, num_channels = waveform.shape

	if self.format is not None:
	muxer = self.format
	encoder = self.encoder
	option = {}
	# Some formats are headerless, so need to provide these infomation.
	if self.format == "mulaw":
	option = {"sample_rate": f"{sample_rate}", "channels": f"{num_channels}"}

	else: # PCM
	muxer = _get_muxer(waveform.dtype)
	encoder = None
	option = {"sample_rate": f"{sample_rate}", "channels": f"{num_channels}"}

	if frames_per_chunk is None:
	src = _encode(waveform, sample_rate, self.effect, muxer, encoder, self.codec_config)
	else:
	src = _AudioStreamingEncoder(
	waveform, sample_rate, self.effect, muxer, encoder, self.codec_config, frames_per_chunk
	)

	output_sr = sample_rate if output_sample_rate is None else output_sample_rate
	filter_desc = _get_afilter_desc(output_sr, _get_sample_fmt(waveform.dtype), num_channels)
	if self.pad_end:
	filter_desc = f"{filter_desc},apad=whole_len={num_frames}"

	reader = StreamReader(src, format=muxer, option=option)
	reader.add_audio_stream(frames_per_chunk or -1, -1, filter_desc=filter_desc)
	return reader

	def apply(self, waveform: Tensor, sample_rate: int, output_sample_rate: Optional[int] = None) -> Tensor:
	"""Apply the effect and/or codecs to the whole tensor.

	Args:
	waveform (Tensor): The input waveform. Shape: ``(time, channel)``
	sample_rate (int): Sample rate of the input waveform.
	output_sample_rate (int or None, optional): Output sample rate.
	If provided, override the output sample rate.
	Otherwise, the resulting tensor is resampled to have
	the same sample rate as the input.
	Default: ``None``.

	Returns:
	Tensor:
	Resulting Tensor. Shape: ``(time, channel)``. The number of frames
	could be different from that of the input.
	"""
	if waveform.ndim != 2:
	raise ValueError(f"Expected the input waveform to be 2D. Found: {waveform.ndim}")

	if waveform.numel() == 0:
	return waveform

	reader = self._get_reader(waveform, sample_rate, output_sample_rate)
	reader.process_all_packets()
	(applied,) = reader.pop_chunks()
	return Tensor(applied)

	def stream(
	self, waveform: Tensor, sample_rate: int, frames_per_chunk: int, output_sample_rate: Optional[int] = None
	) -> Iterator[Tensor]:
	"""Apply the effect and/or codecs to the given tensor chunk by chunk.

	Args:
	waveform (Tensor): The input waveform. Shape: ``(time, channel)``
	sample_rate (int): Sample rate of the waveform.
	frames_per_chunk (int): The number of frames to return at a time.
	output_sample_rate (int or None, optional): Output sample rate.
	If provided, override the output sample rate.
	Otherwise, the resulting tensor is resampled to have
	the same sample rate as the input.
	Default: ``None``.

	Returns:
	Iterator[Tensor]:
	Series of processed chunks. Shape: ``(time, channel)``, where the
	the number of frames matches ``frames_per_chunk`` except the
	last chunk, which could be shorter.
	"""
	if waveform.ndim != 2:
	raise ValueError(f"Expected the input waveform to be 2D. Found: {waveform.ndim}")

	if waveform.numel() == 0:
	return waveform

	reader = self._get_reader(waveform, sample_rate, output_sample_rate, frames_per_chunk)
	for (applied,) in reader.stream():
	yield Tensor(applied)