import type { TaskDataCustom } from "../Types"; const taskData: TaskDataCustom = { datasets: [ { description: "18,000 hours of multilingual audio-text dataset in 108 languages.", id: "mozilla-foundation/common_voice_13_0", }, { description: "An English dataset with 1,000 hours of data.", id: "librispeech_asr", }, { description: "High quality, multi-speaker audio data and their transcriptions in various languages.", id: "openslr", }, ], demo: { inputs: [ { filename: "input.flac", type: "audio", }, ], outputs: [ { /// GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES I label: "Transcript", content: "Going along slushy country roads and speaking to damp audiences in...", type: "text", }, ], }, metrics: [ { description: "", id: "wer", }, { description: "", id: "cer", }, ], models: [ { description: "A powerful ASR model by OpenAI.", id: "openai/whisper-large-v3", }, { description: "A good generic ASR model by MetaAI.", id: "facebook/wav2vec2-base-960h", }, { description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.", id: "facebook/s2t-small-mustc-en-fr-st", }, ], spaces: [ { description: "A powerful general-purpose speech recognition application.", id: "hf-audio/whisper-large-v3", }, { description: "Fastest speech recognition application.", id: "sanchit-gandhi/whisper-jax", }, { description: "A high quality speech and text translation model by Meta.", id: "facebook/seamless_m4t", }, ], summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.", widgetModels: ["openai/whisper-large-v3"], youtubeId: "TksaY_FDgnk", }; export default taskData;