File size: 1,662 Bytes
b2ecf7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import type { TaskDataCustom } from "../Types";

const taskData: TaskDataCustom = {
	datasets: [
		{
			description: "Thousands of short audio clips of a single speaker.",
			id: "lj_speech",
		},
		{
			description: "Multi-speaker English dataset.",
			id: "LibriTTS",
		},
	],
	demo: {
		inputs: [
			{
				label: "Input",
				content: "I love audio models on the Hub!",
				type: "text",
			},
		],
		outputs: [
			{
				filename: "audio.wav",
				type: "audio",
			},
		],
	},
	metrics: [
		{
			description: "The Mel Cepstral Distortion (MCD) metric is used to calculate the quality of generated speech.",
			id: "mel cepstral distortion",
		},
	],
	models: [
		{
			description: "A powerful TTS model.",
			id: "suno/bark",
		},
		{
			description: "A massively multi-lingual TTS model.",
			id: "facebook/mms-tts",
		},
		{
			description: "An end-to-end speech synthesis model.",
			id: "microsoft/speecht5_tts",
		},
	],
	spaces: [
		{
			description: "An application for generate highly realistic, multilingual speech.",
			id: "suno/bark",
		},
		{
			description: "An application that contains multiple speech synthesis models for various languages and accents.",
			id: "coqui/CoquiTTS",
		},
		{
			description: "An application that synthesizes speech for various speaker types.",
			id: "Matthijs/speecht5-tts-demo",
		},
	],
	summary:
		"Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
	widgetModels: ["microsoft/speecht5_tts"],
	youtubeId: "NW62DpzJ274",
};

export default taskData;