Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import outetts
|
2 |
+
|
3 |
+
# Configure the model
|
4 |
+
model_config = outetts.HFModelConfig_v1(
|
5 |
+
model_path="OuteAI/OuteTTS-0.2-500M",
|
6 |
+
language="en", # Supported languages in v0.2: en, zh, ja, ko
|
7 |
+
)
|
8 |
+
|
9 |
+
# Initialize the interface
|
10 |
+
interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)
|
11 |
+
|
12 |
+
# Optional: Create a speaker profile (use a 10-15 second audio clip)
|
13 |
+
# speaker = interface.create_speaker(
|
14 |
+
# audio_path="path/to/audio/file",
|
15 |
+
# transcript="Transcription of the audio file."
|
16 |
+
# )
|
17 |
+
|
18 |
+
# Optional: Save and load speaker profiles
|
19 |
+
# interface.save_speaker(speaker, "speaker.json")
|
20 |
+
# speaker = interface.load_speaker("speaker.json")
|
21 |
+
|
22 |
+
# Optional: Load speaker from default presets
|
23 |
+
interface.print_default_speakers()
|
24 |
+
speaker = interface.load_default_speaker(name="male_1")
|
25 |
+
|
26 |
+
output = interface.generate(
|
27 |
+
text="Speech synthesis is the artificial production of human speech. A computer system used for this purpose is called a speech synthesizer, and it can be implemented in software or hardware products.",
|
28 |
+
# Lower temperature values may result in a more stable tone,
|
29 |
+
# while higher values can introduce varied and expressive speech
|
30 |
+
temperature=0.1,
|
31 |
+
repetition_penalty=1.1,
|
32 |
+
max_length=4096,
|
33 |
+
|
34 |
+
# Optional: Use a speaker profile for consistent voice characteristics
|
35 |
+
# Without a speaker profile, the model will generate a voice with random characteristics
|
36 |
+
speaker=speaker,
|
37 |
+
)
|
38 |
+
|
39 |
+
# Save the synthesized speech to a file
|
40 |
+
output.save("output.wav")
|
41 |
+
|
42 |
+
# Optional: Play the synthesized speech
|
43 |
+
# output.play()
|