Spaces:
Sleeping
Sleeping
import os | |
# Configure cache directories for Hugging Face Spaces | |
os.environ['HF_HOME'] = '/tmp/hf_cache' | |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache' | |
os.environ['HF_HUB_CACHE'] = '/tmp/hf_cache' | |
os.environ['TORCH_HOME'] = '/tmp/torch_cache' | |
# Create cache directories | |
os.makedirs('/tmp/hf_cache', exist_ok=True) | |
os.makedirs('/tmp/torch_cache', exist_ok=True) | |
from kokoro import KPipeline | |
import soundfile as sf | |
import torch | |
# Initialize Kokoro pipeline | |
pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M') | |
# Text to convert to speech | |
text = ''' | |
[Kokoro](/kˈOkəɹO/) is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, [Kokoro](/kˈOkəɹO/) can be deployed anywhere from production environments to personal projects. | |
''' | |
# Generate speech using Kokoro | |
generator = pipeline(text, voice='af_heart') | |
# Process and save the generated audio | |
for i, (gs, ps, audio) in enumerate(generator): | |
print(f"Segment {i}: gs={gs}, ps={ps}") | |
# Save each segment as a separate file | |
sf.write(f'{i}.wav', audio, 24000) | |
print(f"Saved segment {i} as {i}.wav") | |
print("Speech generation completed!") | |