|
--- |
|
library_name: transformers.js |
|
license: cc-by-nc-4.0 |
|
language: |
|
- en |
|
- zh |
|
- ja |
|
- ko |
|
base_model: |
|
- OuteAI/OuteTTS-0.2-500M |
|
pipeline_tag: text-to-speech |
|
--- |
|
|
|
## Usage |
|
|
|
First, install the `outetts` library from [NPM](https://www.npmjs.com/package/outetts) with: |
|
|
|
```sh |
|
npm i outetts |
|
``` |
|
|
|
You can then generate speech as follows: |
|
|
|
```js |
|
import { HFModelConfig_v1, InterfaceHF } from "outetts"; |
|
|
|
// Configure the model |
|
const model_config = new HFModelConfig_v1({ |
|
model_path: "onnx-community/OuteTTS-0.2-500M", |
|
language: "en", // Supported languages in v0.2: en, zh, ja, ko |
|
dtype: "fp32", // Supported dtypes: "fp32", "fp16", "q8", "q4", "q4f16" |
|
// device: "wasm", // Supported devices: "webgpu", "wasm" (browser) or "cpu", "cuda", "dml" (Node.js, OS-specific) |
|
}); |
|
|
|
// Initialize the interface |
|
const tts_interface = await InterfaceHF({ model_version: "0.2", cfg: model_config }); |
|
|
|
// Print available default speakers |
|
tts_interface.print_default_speakers(); |
|
|
|
// Load a default speaker |
|
const speaker = tts_interface.load_default_speaker("male_1"); |
|
|
|
// Generate speech |
|
const output = await tts_interface.generate({ |
|
text: "Speech synthesis is the artificial production of human speech.", |
|
temperature: 0.1, // Lower temperature values may result in a more stable tone |
|
repetition_penalty: 1.1, |
|
max_length: 4096, |
|
|
|
// Optional: Use a speaker profile for consistent voice characteristics |
|
// Without a speaker profile, the model will generate a voice with random characteristics |
|
speaker, |
|
}); |
|
|
|
// Save the synthesized speech to a file |
|
output.save("output.wav"); |
|
``` |
|
|
|
<audio controls src="https://cdn-uploads.huggingface.co/production/uploads/61b253b7ac5ecaae3d1efe0c/6gYU9b43qxPm9Tbg8z4Pa.wav"></audio> |
|
|
|
|