File size: 1,297 Bytes
5efbc82
 
 
 
 
 
 
 
 
 
 
 
9acb9c3
 
 
9a88d9c
9acb9c3
5efbc82
9a88d9c
9acb9c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os

# Configure cache directories for Hugging Face Spaces
os.environ['HF_HOME'] = '/tmp/hf_cache'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
os.environ['HF_HUB_CACHE'] = '/tmp/hf_cache'
os.environ['TORCH_HOME'] = '/tmp/torch_cache'

# Create cache directories
os.makedirs('/tmp/hf_cache', exist_ok=True)
os.makedirs('/tmp/torch_cache', exist_ok=True)

from kokoro import KPipeline
import soundfile as sf
import torch

# Initialize Kokoro pipeline
pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')

# Text to convert to speech
text = '''
[Kokoro](/kˈOkəɹO/) is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, [Kokoro](/kˈOkəɹO/) can be deployed anywhere from production environments to personal projects.
'''

# Generate speech using Kokoro
generator = pipeline(text, voice='af_heart')

# Process and save the generated audio
for i, (gs, ps, audio) in enumerate(generator):
    print(f"Segment {i}: gs={gs}, ps={ps}")
    # Save each segment as a separate file
    sf.write(f'{i}.wav', audio, 24000)
    print(f"Saved segment {i} as {i}.wav")

print("Speech generation completed!")