import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; import Button from '@mui/material/Button'; import Typography from '@mui/material/Typography'; import InputLabel from '@mui/material/InputLabel'; import FormControl from '@mui/material/FormControl'; import Select, {SelectChangeEvent} from '@mui/material/Select'; import MenuItem from '@mui/material/MenuItem'; import Stack from '@mui/material/Stack'; import seamlessLogoUrl from './assets/seamless.svg'; import { AgentCapabilities, BaseResponse, BrowserAudioStreamConfig, DynamicConfig, PartialDynamicConfig, SUPPORTED_INPUT_SOURCES, SUPPORTED_OUTPUT_MODES, ServerExceptionData, ServerSpeechData, ServerState, ServerTextData, StartStreamEventConfig, StreamingStatus, SupportedInputSource, SupportedOutputMode, TranslationSentences, } from './types/StreamingTypes'; import FormLabel from '@mui/material/FormLabel'; import RadioGroup from '@mui/material/RadioGroup'; import FormControlLabel from '@mui/material/FormControlLabel'; import Radio from '@mui/material/Radio'; import './StreamingInterface.css'; import RoomConfig from './RoomConfig'; import Divider from '@mui/material/Divider'; import {useSocket} from './useSocket'; import {RoomState} from './types/RoomState'; import useStable from './useStable'; import float32To16BitPCM from './float32To16BitPCM'; import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; import Checkbox from '@mui/material/Checkbox'; import Alert from '@mui/material/Alert'; import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; import Box from '@mui/material/Box'; import Slider from '@mui/material/Slider'; import VolumeDown from '@mui/icons-material/VolumeDown'; import VolumeUp from '@mui/icons-material/VolumeUp'; import Mic from '@mui/icons-material/Mic'; import MicOff from '@mui/icons-material/MicOff'; import XRDialog from './react-xr/XRDialog'; import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; import { sliceTranslationSentencesUpToIndex, getTotalSentencesLength, } from './sliceTranslationSentencesUtils'; import Blink from './Blink'; import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; import {getURLParams} from './URLParams'; import debug from './debug'; import DebugSection from './DebugSection'; import Switch from '@mui/material/Switch'; import Grid from '@mui/material/Grid'; import {getLanguageFromThreeLetterCode} from './languageLookup'; import HeadphonesIcon from '@mui/icons-material/Headphones'; const AUDIO_STREAM_DEFAULTS = { userMedia: { echoCancellation: false, noiseSuppression: true, }, displayMedia: { echoCancellation: false, noiseSuppression: false, }, } as const; async function requestUserMediaAudioStream( config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'], ) { const stream = await navigator.mediaDevices.getUserMedia({ audio: {...config, channelCount: 1}, }); console.debug( '[requestUserMediaAudioStream] stream created with settings:', stream.getAudioTracks()?.[0]?.getSettings(), ); return stream; } async function requestDisplayMediaAudioStream( config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'], ) { const stream = await navigator.mediaDevices.getDisplayMedia({ audio: {...config, channelCount: 1}, }); console.debug( '[requestDisplayMediaAudioStream] stream created with settings:', stream.getAudioTracks()?.[0]?.getSettings(), ); return stream; } const buttonLabelMap: {[key in StreamingStatus]: string} = { stopped: 'Start Streaming', running: 'Stop Streaming', starting: 'Starting...', }; const BUFFER_LIMIT = 1; const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; const GAIN_MULTIPLIER_OVER_1 = 3; const getGainScaledValue = (value) => value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; const MAX_SERVER_EXCEPTIONS_TRACKED = 500; export const TYPING_ANIMATION_DELAY_MS = 6; export default function StreamingInterface() { const urlParams = getURLParams(); const debugParam = urlParams.debug; const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>( urlParams.animateTextDisplay, ); const socketObject = useSocket(); const {socket, clientID} = socketObject; const [serverState, setServerState] = useState<ServerState | null>(null); const [agent, setAgent] = useState<AgentCapabilities | null>(null); const model = agent?.name ?? null; const agentsCapabilities: Array<AgentCapabilities> = serverState?.agentsCapabilities ?? []; const currentAgent: AgentCapabilities | null = agentsCapabilities.find((agent) => agent.name === model) ?? null; const [serverExceptions, setServerExceptions] = useState< Array<ServerExceptionData> >([]); const [roomState, setRoomState] = useState<RoomState | null>(null); const roomID = roomState?.room_id ?? null; const isSpeaker = (clientID != null && roomState?.speakers.includes(clientID)) ?? false; const isListener = (clientID != null && roomState?.listeners.includes(clientID)) ?? false; const [streamingStatus, setStreamingStatus] = useState<StreamingStatus>('stopped'); const isStreamConfiguredRef = useRef<boolean>(false); const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false); const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t'); const [inputSource, setInputSource] = useState<SupportedInputSource>('userMedia'); const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< boolean | null >(null); const [enableEchoCancellation, setEnableEchoCancellation] = useState< boolean | null >(null); // Dynamic Params: const [targetLang, setTargetLang] = useState<string | null>(null); const [enableExpressive, setEnableExpressive] = useState<boolean | null>( null, ); const [serverDebugFlag, setServerDebugFlag] = useState<boolean>( debugParam ?? false, ); const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]); const [ translationSentencesAnimatedIndex, setTranslationSentencesAnimatedIndex, ] = useState<number>(0); const lastTranslationResultRef = useRef<HTMLDivElement | null>(null); const [inputStream, setInputStream] = useState<MediaStream | null>(null); const [inputStreamSource, setInputStreamSource] = useState<MediaStreamAudioSourceNode | null>(null); const audioContext = useStable<AudioContext>(() => new AudioContext()); const [scriptNodeProcessor, setScriptNodeProcessor] = useState<ScriptProcessorNode | null>(null); const [muted, setMuted] = useState<boolean>(false); // The onaudioprocess script needs an up-to-date reference to the muted state, so // we use a ref here and keep it in sync via useEffect const mutedRef = useRef<boolean>(muted); useEffect(() => { mutedRef.current = muted; }, [muted]); const [gain, setGain] = useState<number>(1); const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom()); // Some config options must be set when starting streaming and cannot be chaned dynamically. // This controls whether they are disabled or not const streamFixedConfigOptionsDisabled = streamingStatus !== 'stopped' || roomID == null; const bufferedSpeechPlayer = useStable(() => { const player = createBufferedSpeechPlayer({ onStarted: () => { console.debug('📢 PLAYBACK STARTED 📢'); }, onEnded: () => { console.debug('🛑 PLAYBACK ENDED 🛑'); }, }); // Start the player now so it eagerly plays audio when it arrives player.start(); return player; }); const translationSentencesBase: TranslationSentences = getTranslationSentencesFromReceivedData(receivedData); const translationSentencesBaseTotalLength = getTotalSentencesLength( translationSentencesBase, ); const translationSentences: TranslationSentences = animateTextDisplay ? sliceTranslationSentencesUpToIndex( translationSentencesBase, translationSentencesAnimatedIndex, ) : translationSentencesBase; // We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up const translationSentencesWithEmptyStartingString = streamingStatus === 'running' && translationSentences.length === 0 ? [''] : translationSentences; /****************************************** * Event Handlers ******************************************/ const setAgentAndUpdateParams = useCallback( (newAgent: AgentCapabilities | null) => { setAgent((prevAgent) => { if (prevAgent?.name !== newAgent?.name) { setTargetLang(newAgent?.targetLangs[0] ?? null); setEnableExpressive(null); } return newAgent; }); }, [], ); const onSetDynamicConfig = useCallback( async (partialConfig: PartialDynamicConfig) => { return new Promise<void>((resolve, reject) => { if (socket == null) { reject(new Error('[onSetDynamicConfig] socket is null ')); return; } socket.emit( 'set_dynamic_config', partialConfig, (result: BaseResponse) => { console.log('[emit result: set_dynamic_config]', result); if (result.status === 'ok') { resolve(); } else { reject(); } }, ); }); }, [socket], ); const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { return new Promise<void>((resolve, reject) => { if (socket == null) { reject(new Error('[configureStreamAsync] socket is null ')); return; } const modelName = agent?.name ?? null; if (modelName == null) { reject(new Error('[configureStreamAsync] modelName is null ')); return; } const config: StartStreamEventConfig = { event: 'config', rate: sampleRate, model_name: modelName, debug: serverDebugFlag, // synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true async_processing: true, buffer_limit: BUFFER_LIMIT, model_type: outputMode, }; console.log('[configureStreamAsync] sending config', config); socket.emit('configure_stream', config, (statusObject) => { setHasMaxSpeakers(statusObject.message === 'max_speakers') if (statusObject.status === 'ok') { isStreamConfiguredRef.current = true; console.debug( '[configureStreamAsync] stream configured!', statusObject, ); resolve(); } else { isStreamConfiguredRef.current = false; reject( new Error( `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, ), ); return; } }); }); }; const startStreaming = async () => { if (streamingStatus !== 'stopped') { console.warn( `Attempting to start stream when status is ${streamingStatus}`, ); return; } setStreamingStatus('starting'); if (audioContext.state === 'suspended') { console.warn('audioContext was suspended! resuming...'); await audioContext.resume(); } let stream: MediaStream | null = null; try { if (inputSource === 'userMedia') { stream = await requestUserMediaAudioStream({ noiseSuppression: enableNoiseSuppression ?? AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, echoCancellation: enableEchoCancellation ?? AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, }); } else if (inputSource === 'displayMedia') { stream = await requestDisplayMediaAudioStream({ noiseSuppression: enableNoiseSuppression ?? AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, echoCancellation: enableEchoCancellation ?? AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, }); } else { throw new Error(`Unsupported input source requested: ${inputSource}`); } setInputStream(stream); } catch (e) { console.error('[startStreaming] media stream request failed:', e); setStreamingStatus('stopped'); return; } const mediaStreamSource = audioContext.createMediaStreamSource(stream); setInputStreamSource(mediaStreamSource); /** * NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but * which is easy and convenient for our purposes. * * Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor * * In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287 */ const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); setScriptNodeProcessor(scriptProcessor); scriptProcessor.onaudioprocess = (event) => { if (isStreamConfiguredRef.current === false) { console.debug('[onaudioprocess] stream is not configured yet!'); return; } if (socket == null) { console.warn('[onaudioprocess] socket is null in onaudioprocess'); return; } if (mutedRef.current) { // We still want to send audio to the server when we're muted to ensure we // get any remaining audio back from the server, so let's pass an array length 1 with a value of 0 const mostlyEmptyInt16Array = new Int16Array(1); socket.emit('incoming_audio', mostlyEmptyInt16Array); } else { const float32Audio = event.inputBuffer.getChannelData(0); const pcm16Audio = float32To16BitPCM(float32Audio); socket.emit('incoming_audio', pcm16Audio); } debug()?.sentAudio(event); }; mediaStreamSource.connect(scriptProcessor); scriptProcessor.connect(audioContext.destination); bufferedSpeechPlayer.start(); try { if (targetLang == null) { throw new Error('[startStreaming] targetLang cannot be nullish'); } // When we are starting the stream we want to pass all the dynamic config values // available before actually configuring and starting the stream const fullDynamicConfig: DynamicConfig = { targetLanguage: targetLang, expressive: enableExpressive, }; await onSetDynamicConfig(fullDynamicConfig); // NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why. await configureStreamAsync({ sampleRate: audioContext.sampleRate, }); } catch (e) { console.error('configureStreamAsync failed', e); setStreamingStatus('stopped'); return; } setStreamingStatus('running'); }; const stopStreaming = useCallback(async () => { if (streamingStatus === 'stopped') { console.warn( `Attempting to stop stream when status is ${streamingStatus}`, ); return; } // Stop the speech playback right away bufferedSpeechPlayer.stop(); if (inputStreamSource == null || scriptNodeProcessor == null) { console.error( 'inputStreamSource || scriptNodeProcessor is null in stopStreaming', ); } else { inputStreamSource.disconnect(scriptNodeProcessor); scriptNodeProcessor.disconnect(audioContext.destination); // Release the mic input so we stop showing the red recording icon in the browser inputStream?.getTracks().forEach((track) => track.stop()); } if (socket == null) { console.warn('Unable to emit stop_stream because socket is null'); } else { socket.emit('stop_stream', (result) => { console.debug('[emit result: stop_stream]', result); }); } setStreamingStatus('stopped'); }, [ audioContext.destination, bufferedSpeechPlayer, inputStream, inputStreamSource, scriptNodeProcessor, socket, streamingStatus, ]); const onClearTranscriptForAll = useCallback(() => { if (socket != null) { socket.emit('clear_transcript_for_all'); } }, [socket]); /****************************************** * Effects ******************************************/ useEffect(() => { if (socket == null) { return; } const onRoomStateUpdate = (roomState: RoomState) => { setRoomState(roomState); }; socket.on('room_state_update', onRoomStateUpdate); return () => { socket.off('room_state_update', onRoomStateUpdate); }; }, [socket]); useEffect(() => { if (socket != null) { const onTranslationText = (data: ServerTextData) => { setReceivedData((prev) => [...prev, data]); debug()?.receivedText(data.payload); }; const onTranslationSpeech = (data: ServerSpeechData) => { bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); }; socket.on('translation_text', onTranslationText); socket.on('translation_speech', onTranslationSpeech); return () => { socket.off('translation_text', onTranslationText); socket.off('translation_speech', onTranslationSpeech); }; } }, [bufferedSpeechPlayer, socket]); useEffect(() => { if (socket != null) { const onServerStateUpdate = (newServerState: ServerState) => { setServerState(newServerState); // If a client creates a server lock, we want to stop streaming if we're not them if ( newServerState.serverLock?.isActive === true && newServerState.serverLock?.clientID !== clientID && streamingStatus === 'running' ) { stopStreaming(); } const firstAgentNullable = newServerState.agentsCapabilities[0]; if (agent == null && firstAgentNullable != null) { setAgentAndUpdateParams(firstAgentNullable); } }; socket.on('server_state_update', onServerStateUpdate); return () => { socket.off('server_state_update', onServerStateUpdate); }; } }, [ agent, clientID, setAgentAndUpdateParams, socket, stopStreaming, streamingStatus, ]); useEffect(() => { if (socket != null) { const onServerException = ( exceptionDataWithoutClientTime: ServerExceptionData, ) => { const exceptionData = { ...exceptionDataWithoutClientTime, timeStringClient: new Date( exceptionDataWithoutClientTime['timeEpochMs'], ).toLocaleString(), }; setServerExceptions((prev) => [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), ); console.error( `[server_exception] The server encountered an exception: ${exceptionData['message']}`, exceptionData, ); }; socket.on('server_exception', onServerException); return () => { socket.off('server_exception', onServerException); }; } }, [socket]); useEffect(() => { if (socket != null) { const onClearTranscript = () => { setReceivedData([]); setTranslationSentencesAnimatedIndex(0); }; socket.on('clear_transcript', onClearTranscript); return () => { socket.off('clear_transcript', onClearTranscript); }; } }, [socket]); useEffect(() => { const onScroll = () => { if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { isScrolledToBottomRef.current = true; return; } isScrolledToBottomRef.current = false; return; }; document.addEventListener('scroll', onScroll); return () => { document.removeEventListener('scroll', onScroll); }; }, []); useLayoutEffect(() => { if ( lastTranslationResultRef.current != null && isScrolledToBottomRef.current ) { // Scroll the div to the most recent entry lastTranslationResultRef.current.scrollIntoView(); } // Run the effect every time data is received, so that // we scroll to the bottom even if we're just adding text to // a pre-existing chunk }, [receivedData]); useEffect(() => { if (!animateTextDisplay) { return; } if ( translationSentencesAnimatedIndex < translationSentencesBaseTotalLength ) { const timeout = setTimeout(() => { setTranslationSentencesAnimatedIndex((prev) => prev + 1); debug()?.startRenderText(); }, TYPING_ANIMATION_DELAY_MS); return () => clearTimeout(timeout); } else { debug()?.endRenderText(); } }, [ animateTextDisplay, translationSentencesAnimatedIndex, translationSentencesBaseTotalLength, ]); /****************************************** * Sub-components ******************************************/ const volumeSliderNode = ( <Stack spacing={2} direction="row" sx={{mb: 1, width: '100%'}} alignItems="center"> <VolumeDown color="primary" /> <Slider aria-label="Volume" defaultValue={1} scale={getGainScaledValue} min={0} max={3} step={0.1} marks={[ {value: 0, label: '0%'}, {value: 1, label: '100%'}, {value: 2, label: '400%'}, {value: 3, label: '700%'}, ]} valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`} valueLabelDisplay="auto" value={gain} onChange={(_event: Event, newValue: number | number[]) => { if (typeof newValue === 'number') { const scaledGain = getGainScaledValue(newValue); // We want the actual gain node to use the scaled value bufferedSpeechPlayer.setGain(scaledGain); // But we want react state to keep track of the non-scaled value setGain(newValue); } else { console.error( `[volume slider] Unexpected non-number value: ${newValue}`, ); } }} /> <VolumeUp color="primary" /> </Stack> ); const xrDialogComponent = ( <XRDialog animateTextDisplay={ animateTextDisplay && translationSentencesAnimatedIndex == translationSentencesBaseTotalLength } bufferedSpeechPlayer={bufferedSpeechPlayer} translationSentences={translationSentences} roomState={roomState} roomID={roomID} startStreaming={startStreaming} stopStreaming={stopStreaming} debugParam={debugParam} onARHidden={() => { setAnimateTextDisplay(urlParams.animateTextDisplay); }} onARVisible={() => setAnimateTextDisplay(false)} /> ); return ( <div className="app-wrapper-sra"> <Box // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore Not sure why it's complaining about complexity here sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}> <div className="main-container-sra"> <div className="top-section-sra horizontal-padding-sra"> <div className="header-container-sra"> <img src={seamlessLogoUrl} className="header-icon-sra" alt="Seamless Translation Logo" height={24} width={24} /> <div> <Typography variant="h1" sx={{color: '#65676B'}}> PulseLive Seamless Event Hosting </Typography> </div> </div> <div className="header-container-sra"> <div> <Typography variant="body2" sx={{color: '#65676B'}}> Welcome ! This space is limited to one commentator at a time. Use headphones if you are both host and listener to prevent feedback. <br/> PulseLiveSeamlessStreaming is based on the Meta SeamlessStreaming research model and is not released for production deployment. The streaming quality is closely related to proper VAD segmentation. <br/> It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold in the model config. The real-time performance will degrade if you try translating multiple commentators at the same time. </Typography> </div> </div> <Stack spacing="22px" direction="column"> <Box> <RoomConfig roomState={roomState} serverState={serverState} streamingStatus={streamingStatus} onJoinRoomOrUpdateRoles={() => { // If the user has switched from speaker to listener we need to tell the // player to play eagerly, since currently the listener doesn't have any stop/start controls bufferedSpeechPlayer.start(); }} /> {isListener && !isSpeaker && ( <Box sx={{ paddingX: 6, paddingBottom: 2, marginY: 2, display: 'flex', flexDirection: 'column', alignItems: 'center', }}> {volumeSliderNode} </Box> )} </Box> {isSpeaker && ( <> <Divider /> {/* <Stack spacing="12px" direction="column"> <FormLabel id="output-modes-radio-group-label"> Model </FormLabel> <FormControl disabled={ streamFixedConfigOptionsDisabled || agentsCapabilities.length === 0 } fullWidth sx={{minWidth: '14em'}}> <InputLabel id="model-selector-input-label"> Model </InputLabel> <Select labelId="model-selector-input-label" label="Model" onChange={(e: SelectChangeEvent) => { const newAgent = agentsCapabilities.find( (agent) => e.target.value === agent.name, ) ?? null; if (newAgent == null) { console.error( 'Unable to find agent with name', e.target.value, ); } setAgentAndUpdateParams(newAgent); }} value={model ?? ''}> {agentsCapabilities.map((agent) => ( <MenuItem value={agent.name} key={agent.name}> {agent.name} </MenuItem> ))} </Select> </FormControl> </Stack> */} <Stack spacing={0.5}> <FormLabel id="output-modes-radio-group-label"> Output </FormLabel> <Box sx={{paddingTop: 2, paddingBottom: 1}}> <FormControl fullWidth sx={{minWidth: '14em'}}> <InputLabel id="target-selector-input-label"> Target Language </InputLabel> <Select labelId="target-selector-input-label" label="Target Language" onChange={(e: SelectChangeEvent) => { setTargetLang(e.target.value); onSetDynamicConfig({ targetLanguage: e.target.value, }); }} value={targetLang ?? ''}> {currentAgent?.targetLangs.map((langCode) => ( <MenuItem value={langCode} key={langCode}> {getLanguageFromThreeLetterCode(langCode) != null ? `${getLanguageFromThreeLetterCode( langCode, )} (${langCode})` : langCode} </MenuItem> ))} </Select> </FormControl> </Box> <Grid container> <Grid item xs={12} sm={4}> <FormControl disabled={streamFixedConfigOptionsDisabled}> <RadioGroup aria-labelledby="output-modes-radio-group-label" value={outputMode} onChange={(e) => setOutputMode( e.target.value as SupportedOutputMode, ) } name="output-modes-radio-buttons-group"> { // TODO: Use supported modalities from agentCapabilities SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( <FormControlLabel key={value} value={value} control={<Radio />} label={label} /> )) } </RadioGroup> </FormControl> </Grid> <Grid item xs={12} sm={8}> <Stack direction="column" spacing={1} alignItems="flex-start" sx={{flexGrow: 1}}> {currentAgent?.dynamicParams?.includes( 'expressive', ) && ( <FormControlLabel control={ <Switch checked={enableExpressive ?? false} onChange={( event: React.ChangeEvent<HTMLInputElement>, ) => { const newValue = event.target.checked; setEnableExpressive(newValue); onSetDynamicConfig({ expressive: newValue, }); }} /> } label="Expressive" /> )} {isListener && ( <Box sx={{ flexGrow: 1, paddingX: 1.5, paddingY: 1.5, width: '100%', }}> {volumeSliderNode} </Box> )} </Stack> </Grid> </Grid> </Stack> <Stack direction="row" spacing={2} justifyContent="space-between"> <Box sx={{flex: 1}}> <FormControl disabled={streamFixedConfigOptionsDisabled}> <FormLabel id="input-source-radio-group-label"> Input Source </FormLabel> <RadioGroup aria-labelledby="input-source-radio-group-label" value={inputSource} onChange={(e: React.ChangeEvent<HTMLInputElement>) => setInputSource( e.target.value as SupportedInputSource, ) } name="input-source-radio-buttons-group"> {SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( <FormControlLabel key={value} value={value} control={<Radio />} label={label} /> ))} </RadioGroup> </FormControl> </Box> <Box sx={{flex: 1, flexGrow: 2}}> <FormControl disabled={streamFixedConfigOptionsDisabled}> <FormLabel>Options</FormLabel> <FormControlLabel control={ <Checkbox checked={ enableNoiseSuppression ?? AUDIO_STREAM_DEFAULTS[inputSource] .noiseSuppression } onChange={( event: React.ChangeEvent<HTMLInputElement>, ) => setEnableNoiseSuppression(event.target.checked) } /> } label="Noise Suppression" /> <FormControlLabel control={ <Checkbox checked={ enableEchoCancellation ?? AUDIO_STREAM_DEFAULTS[inputSource] .echoCancellation } onChange={( event: React.ChangeEvent<HTMLInputElement>, ) => setEnableEchoCancellation(event.target.checked) } /> } label="Echo Cancellation (not recommended)" /> <FormControlLabel control={ <Checkbox checked={serverDebugFlag} onChange={( event: React.ChangeEvent<HTMLInputElement>, ) => setServerDebugFlag(event.target.checked)} /> } label="Enable Server Debugging" /> </FormControl> </Box> </Stack> {isSpeaker && isListener && inputSource === 'userMedia' && !enableEchoCancellation && gain !== 0 && ( <div> <Alert severity="warning" icon={<HeadphonesIcon />}> Headphones required to prevent feedback. </Alert> </div> )} {isSpeaker && enableEchoCancellation && ( <div> <Alert severity="warning"> We don't recommend using echo cancellation as it may distort the input audio. If possible, use headphones and disable echo cancellation instead. </Alert> </div> )} <Stack direction="row" spacing={2}> {streamingStatus === 'stopped' ? ( <Button variant="contained" onClick={startStreaming} disabled={ roomID == null || // Prevent users from starting streaming if there is a server lock with an active session (serverState?.serverLock?.isActive === true && serverState.serverLock.clientID !== clientID) }> {buttonLabelMap[streamingStatus]} </Button> ) : ( <Button variant="contained" color={ streamingStatus === 'running' ? 'error' : 'primary' } disabled={ streamingStatus === 'starting' || roomID == null } onClick={stopStreaming}> {buttonLabelMap[streamingStatus]} </Button> )} <Box> <Button variant="contained" aria-label={muted ? 'Unmute' : 'Mute'} color={muted ? 'info' : 'primary'} onClick={() => setMuted((prev) => !prev)} sx={{ borderRadius: 100, paddingX: 0, minWidth: '36px', }}> {muted ? <MicOff /> : <Mic />} </Button> </Box> {roomID == null ? null : ( <Box sx={{ flexGrow: 1, display: 'flex', justifyContent: 'flex-end', }}> {xrDialogComponent} </Box> )} </Stack> {serverExceptions.length > 0 && ( <div> <Alert severity="error"> {`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`} </Alert> </div> )} {serverState != null && hasMaxSpeakers && ( <div> <Alert severity="error"> {`Maximum number of speakers reached. Please try again at a later time.`} </Alert> </div> )} {serverState != null && serverState.totalActiveTranscoders >= TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && ( <div> <Alert severity="warning"> {`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`} </Alert> </div> )} {serverState?.serverLock != null && serverState.serverLock.clientID !== clientID && ( <div> <Alert severity="warning"> {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`} </Alert> </div> )} </> )} </Stack> {isListener && !isSpeaker && ( <Box sx={{marginBottom: 1, marginTop: 2}}> {xrDialogComponent} </Box> )} </div> {debugParam && roomID != null && <DebugSection />} <div className="translation-text-container-sra horizontal-padding-sra"> <Stack direction="row" spacing={2} sx={{mb: '16px', alignItems: 'center'}}> <Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}> Transcript </Typography> {isSpeaker && ( <Button variant="text" size="small" onClick={onClearTranscriptForAll}> Clear Transcript for All </Button> )} </Stack> <Stack direction="row"> <div className="translation-text-sra"> {translationSentencesWithEmptyStartingString.map( (sentence, index, arr) => { const isLast = index === arr.length - 1; const maybeRef = isLast ? {ref: lastTranslationResultRef} : {}; return ( <div className="text-chunk-sra" key={index} {...maybeRef}> <Typography variant="body1"> {sentence} {animateTextDisplay && isLast && ( <Blink intervalMs={CURSOR_BLINK_INTERVAL_MS} shouldBlink={ (roomState?.activeTranscoders ?? 0) > 0 }> <Typography component="span" variant="body1" sx={{ display: 'inline-block', transform: 'scaleY(1.25) translateY(-1px)', }}> {'|'} </Typography> </Blink> )} </Typography> </div> ); }, )} </div> </Stack> </div> </div> </Box> </div> ); }