Spaces:

diabolic6045
/

tts-api

Sleeping

App Files Files Community

Divax commited on Jun 2

Commit

94fd4b0

1 Parent(s): 71905d8

test

Browse files

Files changed (16) hide show

Dockerfile +13 -19
Dockerfile.coqui +0 -51
README.md +64 -288
README_coqui.md +0 -351
app.py +0 -414
app_config.py +0 -54
client_example.py +0 -269
requirements.txt +8 -13
requirements_coqui.txt +0 -12
start_c3po_api.py +17 -136
startup.py +0 -120
test.py +0 -144
test_build.py +69 -0
test_coqui_api.py +0 -146
test_coqui_tts.py +0 -99
test_kokoro_install.py +0 -86

Dockerfile CHANGED Viewed

@@ -1,13 +1,12 @@
-FROM python:3.11
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
-# Install system dependencies as root
 RUN apt-get update && apt-get install -y \
     git \
     git-lfs \
-    espeak-ng \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
@@ -17,35 +16,30 @@ RUN git lfs install
 # Switch to the "user" user
 USER user
-# Set home to the user's home directory
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     COQUI_TOS_AGREED=1 \
-    NUMBA_DISABLE_JIT=1 \
-    FORCE_CPU=true \
-    CUDA_VISIBLE_DEVICES=""
-# Set the working directory to the user's home directory
 WORKDIR $HOME/app
-# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
 RUN pip install --no-cache-dir --upgrade pip
-# Copy requirements first and install dependencies
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Download unidic for mecab (required for some TTS features)
-RUN python -m unidic download
-# Clone the C3PO XTTS model
-RUN git clone https://huggingface.co/Borcherding/XTTS-v2_C3PO XTTS-v2_C3PO
-# Copy the current directory contents into the container at $HOME/app setting the owner to the user
-COPY --chown=user . $HOME/app
 # Expose the port
 EXPOSE 7860
-# Start the API directly
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.11-slim
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
+# Install only essential system dependencies
 RUN apt-get update && apt-get install -y \
     git \
     git-lfs \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 # Switch to the "user" user
 USER user
+# Set environment variables
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     COQUI_TOS_AGREED=1 \
+    HF_HUB_DISABLE_TELEMETRY=1
+# Set the working directory
 WORKDIR $HOME/app
+# Upgrade pip
 RUN pip install --no-cache-dir --upgrade pip
+# Copy and install requirements
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download the C-3PO model to speed up startup
+RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='Borcherding/XTTS-v2_C3PO', local_dir='./models/XTTS-v2_C3PO', local_dir_use_symlinks=False)"
+# Copy the API file
+COPY --chown=user coqui_api.py .
 # Expose the port
 EXPOSE 7860
+# Start the C-3PO TTS API
+CMD ["uvicorn", "coqui_api:app", "--host", "0.0.0.0", "--port", "7860"]

Dockerfile.coqui DELETED Viewed

@@ -1,51 +0,0 @@
-FROM python:3.11
-# Set up a new user named "user" with user ID 1000
-RUN useradd -m -u 1000 user
-# Install system dependencies as root
-RUN apt-get update && apt-get install -y \
-    git \
-    git-lfs \
-    espeak-ng \
-    ffmpeg \
-    libsndfile1 \
-    && rm -rf /var/lib/apt/lists/*
-# Initialize git lfs
-RUN git lfs install
-# Switch to the "user" user
-USER user
-# Set home to the user's home directory
-ENV HOME=/home/user \
-    PATH=/home/user/.local/bin:$PATH \
-    COQUI_TOS_AGREED=1 \
-    HF_HUB_DISABLE_TELEMETRY=1 \
-    HF_HOME=/home/user/.cache/huggingface
-# Set the working directory to the user's home directory
-WORKDIR $HOME/app
-# Upgrade pip
-RUN pip install --no-cache-dir --upgrade pip
-# Install PyTorch with CPU support for Hugging Face Spaces
-RUN pip install --no-cache-dir torch torchaudio --index-url https://download.pytorch.org/whl/cpu
-# Copy requirements and install dependencies
-COPY --chown=user requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-# Copy the API file
-COPY --chown=user coqui_api.py .
-# Create necessary directories
-RUN mkdir -p $HOME/.cache $HOME/app/models
-# Expose the port
-EXPOSE 7860
-# Start the Coqui TTS API
-CMD ["uvicorn", "coqui_api:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,334 +1,110 @@
----
-title: XTTS C3PO Voice Cloning API
-emoji: 🤖
-colorFrom: indigo
-colorTo: yellow
-sdk: docker
-pinned: false
----
-# XTTS C3PO Voice Cloning API
-A FastAPI-based Text-to-Speech API using XTTS-v2 with the iconic C3PO voice from Star Wars.
-## Features
-- **C3PO Voice**: Pre-loaded with the iconic C3PO voice from Star Wars
-- **Custom Voice Cloning**: Upload your own reference audio for voice cloning
-- **Multilingual Support**: 16+ languages with C3PO voice
-- **No Upload Required**: Use C3PO voice without any file uploads
-- **RESTful API**: Clean API with automatic documentation
-- **Docker Support**: Optimized for Hugging Face Spaces deployment
-- **PyTorch 2.6 Compatible**: Includes compatibility fixes
-## About the C3PO Model
-This API uses the XTTS-v2 C3PO model from [Borcherding/XTTS-v2_C3PO](https://huggingface.co/Borcherding/XTTS-v2_C3PO), which provides the iconic voice of C-3PO from Star Wars. The model supports:
-- High-quality C3PO voice synthesis
-- Multilingual C3PO speech (16+ languages)
-- Custom voice cloning capabilities
-- Real-time speech generation
-## Quick Start
-### Using C3PO Voice (No Upload Required)
 ```bash
-curl -X POST "http://localhost:7860/tts-c3po" \
-  -F "text=Hello there! I am C-3PO, human-cyborg relations." \
-  -F "language=en" \
-  --output c3po_speech.wav
-```
-### Using Custom Voice Cloning
-```bash
-curl -X POST "http://localhost:7860/tts" \
-  -F "text=This will be spoken in your custom voice!" \
-  -F "language=en" \
-  -F "speaker_file=@your_reference_voice.wav" \
-  --output custom_speech.wav
 ```
-## API Endpoints
-### C3PO Voice Only
-- **POST** `/tts-c3po` - Generate speech using C3PO voice (no file upload needed)
-  - **Parameters:**
-    - `text` (form): Text to convert to speech (max 500 characters)
-    - `language` (form): Language code (default: "en")
-    - `no_lang_auto_detect` (form): Disable automatic language detection
-### Voice Cloning with Fallback
-- **POST** `/tts` - Convert text to speech with optional custom voice
-  - **Parameters:**
-    - `text` (form): Text to convert to speech (max 500 characters)
-    - `language` (form): Language code (default: "en")
-    - `voice_cleanup` (form): Apply audio cleanup to reference voice
-    - `no_lang_auto_detect` (form): Disable automatic language detection
-    - `speaker_file` (file, optional): Reference speaker audio file (uses C3PO if not provided)
-### JSON API
-- **POST** `/tts-json` - Convert text to speech using JSON request body
-  - **Body:** JSON object with `text`, `language`, `voice_cleanup`, `no_lang_auto_detect`
-  - **File:** `speaker_file` (optional) - Reference speaker audio file
-### Information Endpoints
-- **GET** `/health` - Check API status, device info, and supported languages
-- **GET** `/languages` - Get list of supported languages
-- **GET** `/docs` - Interactive API documentation (Swagger UI)
-## Usage Examples
-### Python - C3PO Voice
-```python
-import requests
-# Generate C3PO speech
-url = "http://localhost:7860/tts-c3po"
-data = {
-    "text": "Hello there! I am C-3PO, human-cyborg relations.",
-    "language": "en"
-}
-response = requests.post(url, data=data)
-if response.status_code == 200:
-    with open("c3po_speech.wav", "wb") as f:
-        f.write(response.content)
-    print("C3PO speech generated!")
-```
-### Python - Custom Voice with C3PO Fallback
-```python
-import requests
-url = "http://localhost:7860/tts"
-data = {
-    "text": "This will use C3PO voice if no speaker file is provided.",
-    "language": "en"
-}
-# No speaker_file provided - will use C3PO voice
-response = requests.post(url, data=data)
-if response.status_code == 200:
-    with open("speech_output.wav", "wb") as f:
-        f.write(response.content)
-```
-### Multilingual C3PO
-```python
-# C3PO speaking Spanish
-data = {
-    "text": "Hola, soy C-3PO. Domino más de seis millones de formas de comunicación.",
-    "language": "es"
-}
-response = requests.post("http://localhost:7860/tts-c3po", data=data)
-```
-## Supported Languages
-The C3PO model supports all XTTS-v2 languages:
-- **en** - English
-- **es** - Spanish
-- **fr** - French
-- **de** - German
-- **it** - Italian
-- **pt** - Portuguese (Brazilian)
-- **pl** - Polish
-- **tr** - Turkish
-- **ru** - Russian
-- **nl** - Dutch
-- **cs** - Czech
-- **ar** - Arabic
-- **zh-cn** - Mandarin Chinese
-- **ja** - Japanese
-- **ko** - Korean
-- **hu** - Hungarian
-- **hi** - Hindi
-## Setup
-### CPU-Only Installation (Recommended for most users)
-For CPU-only usage (no GPU required):
-```bash
-# Ubuntu/Debian
-sudo apt-get install espeak-ng ffmpeg git git-lfs
-# macOS
-brew install espeak ffmpeg git git-lfs
-```
-2. **Install CPU-only PyTorch and dependencies:**
 ```bash
-# Option 1: Use the provided script
-chmod +x install_cpu.sh
-./install_cpu.sh
-# Option 2: Manual installation
-pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
 pip install -r requirements.txt
-python -m unidic download
-```
-3. **Set CPU-only environment variables:**
-```bash
-export FORCE_CPU=true
-export CUDA_VISIBLE_DEVICES=""
 ```
-4. **Run the API:**
-```bash
-uvicorn app:app --host 0.0.0.0 --port 7860
-```
-### Hugging Face Spaces Deployment
-This API is optimized for Hugging Face Spaces with:
-- Automatic C3PO model downloading
-- Proper user permissions (user ID 1000)
-- PyTorch 2.6 compatibility fixes
-- COQUI license agreement handling
-### Local Development
-1. **Install system dependencies:**
 ```bash
-# Ubuntu/Debian
-sudo apt-get install espeak-ng ffmpeg git git-lfs
-# macOS
-brew install espeak ffmpeg git git-lfs
 ```
-2. **Install Python dependencies:**
 ```bash
-pip install -r requirements.txt
-python -m unidic download
 ```
-3. **Clone C3PO model (optional - auto-downloaded on first run):**
 ```bash
-git clone https://huggingface.co/Borcherding/XTTS-v2_C3PO XTTS-v2_C3PO
 ```
-4. **Run the API:**
 ```bash
-uvicorn app:app --host 0.0.0.0 --port 7860
 ```
-### Using Docker
-```bash
-# Build and run
-docker build -t xtts-c3po-api .
-docker run -p 7860:7860 xtts-c3po-api
-```
-## Reference Audio Guidelines
-For custom voice cloning:
-1. **Duration**: 3-10 seconds of clear speech
-2. **Quality**: High-quality audio, minimal background noise
-3. **Format**: WAV format recommended (MP3, M4A also supported)
-4. **Content**: Natural speech, avoid music or effects
-5. **Speaker**: Single speaker, clear pronunciation
-## Model Information
-- **Base Model**: XTTS-v2
-- **Voice**: C3PO from Star Wars
-- **Source**: [Borcherding/XTTS-v2_C3PO](https://huggingface.co/Borcherding/XTTS-v2_C3PO)
-- **Languages**: 16+ supported
-- **License**: CPML (Coqui Public Model License)
-## Testing
-Run the test suite:
 ```bash
-# Test C3PO model functionality
-python test.py
-# Test API endpoints
-python client_example.py
-```
-## Environment Variables
-Automatically configured:
-- `COQUI_TOS_AGREED=1` - Agrees to CPML license
-- `NUMBA_DISABLE_JIT=1` - Disables Numba JIT compilation
-## API Response Examples
-### Health Check Response
-```json
-{
-  "status": "healthy",
-  "device": "cuda",
-  "model": "XTTS-v2 C3PO",
-  "default_voice": "C3PO",
-  "supported_languages": ["en", "es", "fr", ...]
-}
-```
-### Languages Response
-```json
-{
-  "languages": ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"]
-}
 ```
-## Troubleshooting
-### CPU Performance
-When running on CPU:
-- Speech generation will be slower than GPU (30-60 seconds vs 3-5 seconds)
-- Memory usage is lower (2-4GB RAM vs 4-8GB VRAM)
-- No CUDA installation required
-- Works on any system with sufficient RAM
-### PyTorch Loading Issues
-The API includes fixes for PyTorch 2.6's `weights_only=True` default. If you encounter loading issues, ensure the compatibility fix is applied.
-### Model Download Issues
-If the C3PO model fails to download:
-1. Check internet connection
-2. Verify git and git-lfs are installed
-3. Manually clone: `git clone https://huggingface.co/Borcherding/XTTS-v2_C3PO XTTS-v2_C3PO`
-### Audio Quality Issues
-- Use high-quality reference audio for custom voices
-- Enable `voice_cleanup` for noisy reference audio
-- Ensure reference audio is 3-10 seconds long
-### Memory Issues
-- **CPU Mode**: Requires 2-4GB RAM, works on most modern computers
-- **GPU Mode**: Requires 4GB+ VRAM for optimal performance
-- Reduce text length for batch processing
-- Use CPU mode with `FORCE_CPU=true` environment variable
-### CPU-Only Installation Issues
-If you encounter GPU-related errors:
-1. Set environment variables: `export FORCE_CPU=true CUDA_VISIBLE_DEVICES=""`
-2. Install CPU-only PyTorch: `pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu`
-3. Restart the API after setting environment variables
-## License
-This project uses XTTS-v2 which is licensed under the Coqui Public Model License (CPML). The C3PO model is provided by the community. See https://coqui.ai/cpml for license details.
-## Credits
-- **XTTS-v2**: Coqui AI
-- **C3PO Model**: [Borcherding](https://huggingface.co/Borcherding)
-- **Original Character**: C-3PO from Star Wars (Lucasfilm/Disney)

+# 🤖 C-3PO TTS API
+A FastAPI-based text-to-speech service using the **C-3PO fine-tuned XTTS v2 model** from [Borcherding/XTTS-v2_C3PO](https://huggingface.co/Borcherding/XTTS-v2_C3PO) for authentic C-3PO voice synthesis.
+## ✨ Features
+- 🤖 **Authentic C-3PO Voice**: Fine-tuned XTTS v2 model with 20 unique C-3PO voice lines
+- 🌍 **17+ Languages**: Multilingual support while maintaining C-3PO characteristics
+- 🎭 **Voice Cloning**: Optional custom voice cloning capabilities
+- 🚀 **FastAPI**: Modern API with automatic documentation
+- 🐳 **Docker Ready**: Containerized for easy deployment
+## 🚀 Quick Start
+### Docker Deployment
 ```bash
+# Build the container
+docker build -t c3po-tts .
+# Run the container
+docker run -p 7860:7860 c3po-tts
 ```
+### Local Development
 ```bash
+# Install dependencies
 pip install -r requirements.txt
+# Run the API
+python coqui_api.py
 ```
+The API will be available at `http://localhost:7860`
+## 📡 API Endpoints
+### C-3PO Text-to-Speech
 ```bash
+curl -X POST "http://localhost:7860/tts-c3po" \
+  -F "text=I am C-3PO, human-cyborg relations." \
+  -F "language=en" \
+  --output c3po_voice.wav
 ```
+### General Text-to-Speech (with C-3PO voice by default)
 ```bash
+curl -X POST "http://localhost:7860/tts" \
+  -F "text=The odds of successfully navigating an asteroid field are approximately 3,720 to 1." \
+  -F "language=en" \
+  --output c3po_output.wav
 ```
+### JSON API
 ```bash
+curl -X POST "http://localhost:7860/tts-json" \
+  -H "Content-Type: application/json" \
+  -d '{"text": "R2-D2, you know better than to trust a strange computer!", "language": "en"}' \
+  --output c3po_json.wav
 ```
+### Health Check
 ```bash
+curl http://localhost:7860/health
 ```
+## 🌍 Supported Languages
+English, Spanish, French, German, Italian, Portuguese, Polish, Turkish, Russian, Dutch, Czech, Arabic, Chinese, Japanese, Hungarian, Korean, Hindi
+## 🎨 Example C-3PO Phrases
+Perfect texts for demonstrating C-3PO's voice:
+- "I am C-3PO, human-cyborg relations."
+- "The odds of successfully navigating an asteroid field are approximately 3,720 to 1."
+- "R2-D2, you know better than to trust a strange computer!"
+- "Oh my! How interesting!"
+## 📖 API Documentation
+Visit `http://localhost:7860/docs` for interactive API documentation.
+## 🧪 Testing
 ```bash
+# Run the C-3PO test suite
+python test_c3po_model.py
 ```
+## 🔧 Configuration
+The API automatically downloads the C-3PO model on first run. Environment variables:
+- `COQUI_TOS_AGREED=1`: Accepts Coqui TTS terms
+- `HF_HUB_DISABLE_TELEMETRY=1`: Disables telemetry
+## 📦 Files
+- `coqui_api.py`: Main C-3PO TTS API
+- `test_c3po_model.py`: Test suite for C-3PO functionality
+- `start_c3po_api.py`: Startup script with dependency checks
+- `Dockerfile`: Container configuration
+- `requirements.txt`: Python dependencies
+## 🎭 Credits
+- [C-3PO Fine-tuned Model](https://huggingface.co/Borcherding/XTTS-v2_C3PO) by Borcherding
+- [Coqui TTS](https://github.com/coqui-ai/TTS) - The underlying TTS engine
+- [FastAPI](https://fastapi.tiangolo.com/) - Web framework

README_coqui.md DELETED Viewed

@@ -1,351 +0,0 @@
-# 🤖 Coqui TTS C-3PO API for Hugging Face Spaces
-A FastAPI-based text-to-speech service using the Coqui TTS library with the **C-3PO fine-tuned XTTS v2 model** from [Borcherding/XTTS-v2_C3PO](https://huggingface.co/Borcherding/XTTS-v2_C3PO) for authentic C-3PO voice synthesis.
-## ✨ Features
-- 🤖 **C-3PO Voice**: Authentic C-3PO voice using fine-tuned XTTS v2 model
-- 🎯 **Text-to-Speech**: Convert text to natural-sounding speech
-- 🎭 **Voice Cloning**: Clone any voice from a reference audio sample
-- 🌍 **Multilingual**: Support for 17+ languages with C-3PO voice characteristics
-- 🚀 **FastAPI**: Modern, fast API with automatic documentation
-- 🐳 **Docker Ready**: Containerized for easy deployment
-- ☁️ **Hugging Face Spaces**: Optimized for HF Spaces deployment
-## 🎭 C-3PO Model Information
-This API uses the fine-tuned C-3PO voice model from [Borcherding/XTTS-v2_C3PO](https://huggingface.co/Borcherding/XTTS-v2_C3PO), which features:
-- **Fine-tuned on 20 unique C-3PO voice lines** from Star Wars
-- **Multi-lingual support** (17 languages) while maintaining C-3PO's distinctive voice
-- **Emotion & Style Transfer** capturing C-3PO's formal, protocol droid characteristics
-- **High-Quality Audio** output at 24kHz sampling rate
-## 📡 API Endpoints
-### 1. Health Check
-```bash
-GET /health
-```
-Returns API status, model information, and C-3PO voice availability.
-### 2. List Models
-```bash
-GET /models
-```
-Returns available TTS models.
-### 3. C-3PO Text-to-Speech (Dedicated)
-```bash
-POST /tts-c3po
-```
-**Parameters:**
-- `text` (string): Text to convert to C-3PO voice (2-500 characters)
-- `language` (string): Language code (default: "en")
-**Example using curl:**
-```bash
-curl -X POST "http://localhost:7860/tts-c3po" \
-  -F "text=I am C-3PO, human-cyborg relations." \
-  -F "language=en" \
-  --output c3po_voice.wav
-```
-### 4. General Text-to-Speech
-```bash
-POST /tts
-```
-**Parameters:**
-- `text` (string): Text to convert to speech (2-500 characters)
-- `language` (string): Language code (default: "en")
-- `speaker_file` (file, optional): Reference audio for voice cloning
-- `use_c3po_voice` (boolean): Use C-3PO voice if no speaker file provided (default: true)
-**Example using curl:**
-```bash
-# C-3PO voice (default)
-curl -X POST "http://localhost:7860/tts" \
-  -F "text=The odds of successfully navigating an asteroid field are approximately 3,720 to 1." \
-  -F "language=en" \
-  --output c3po_output.wav
-# Custom voice cloning
-curl -X POST "http://localhost:7860/tts" \
-  -F "text=This will sound like the reference voice." \
-  -F "language=en" \
-  -F "speaker_file=@reference_voice.wav" \
-  -F "use_c3po_voice=false" \
-  --output cloned_voice.wav
-```
-### 5. JSON TTS (C-3PO Voice)
-```bash
-POST /tts-json
-```
-**JSON Body:**
-```json
-{
-  "text": "R2-D2, you know better than to trust a strange computer!",
-  "language": "en"
-}
-```
-## 🚀 Deployment on Hugging Face Spaces
-### Step 1: Create a new Space
-1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
-2. Click "Create new Space"
-3. Choose "Docker" as the SDK
-4. Set your space name and visibility
-### Step 2: Add files to your Space
-Upload these files to your Hugging Face Space repository:
-```
-your-space/
-├── coqui_api.py          # Main API file with C-3PO integration
-├── requirements.txt      # Dependencies (includes huggingface_hub)
-├── Dockerfile.coqui      # Docker configuration
-├── test_c3po_model.py    # Test script for C-3PO functionality
-└── README.md            # This file
-```
-### Step 3: Configure your Space
-Rename the files in your Space:
-- `Dockerfile.coqui` → `Dockerfile`
-### Step 4: Deploy
-Your Space will automatically build and deploy. The build process may take 15-20 minutes as it downloads the C-3PO fine-tuned model from Hugging Face.
-## 💻 Local Development
-### Requirements
-- Python 3.11+
-- PyTorch
-- Coqui TTS library
-- Hugging Face Hub
-### Installation
-```bash
-# Clone the repository
-git clone <your-repo>
-cd <your-repo>
-# Install dependencies
-pip install -r requirements.txt
-# Run the API
-python coqui_api.py
-```
-The API will be available at `http://localhost:7860`
-### Testing
-```bash
-# Run the C-3PO model test suite
-python test_c3po_model.py
-# Run the general test client
-python test_coqui_api.py
-```
-## 🎪 Usage Examples
-### Python Client - C-3PO Voice
-```python
-import requests
-# C-3PO voice synthesis
-data = {"text": "I am C-3PO, human-cyborg relations.", "language": "en"}
-response = requests.post("http://localhost:7860/tts-c3po", data=data)
-with open("c3po_output.wav", "wb") as f:
-    f.write(response.content)
-# JSON API
-import json
-headers = {'Content-Type': 'application/json'}
-data = {"text": "The odds are approximately 3,720 to 1!", "language": "en"}
-response = requests.post("http://localhost:7860/tts-json", json=data, headers=headers)
-with open("c3po_json.wav", "wb") as f:
-    f.write(response.content)
-```
-### JavaScript/Web - C-3PO Voice
-```javascript
-// C-3PO voice synthesis
-const formData = new FormData();
-formData.append('text', 'Oh my! How interesting!');
-formData.append('language', 'en');
-fetch('http://localhost:7860/tts-c3po', {
-    method: 'POST',
-    body: formData
-})
-.then(response => response.blob())
-.then(blob => {
-    const url = URL.createObjectURL(blob);
-    const audio = new Audio(url);
-    audio.play();
-});
-// JSON API
-fetch('http://localhost:7860/tts-json', {
-    method: 'POST',
-    headers: {'Content-Type': 'application/json'},
-    body: JSON.stringify({
-        text: 'R2-D2, you know better than to trust a strange computer!',
-        language: 'en'
-    })
-})
-.then(response => response.blob())
-.then(blob => {
-    const url = URL.createObjectURL(blob);
-    const audio = new Audio(url);
-    audio.play();
-});
-```
-## 🎨 C-3PO Voice Examples
-Perfect texts for demonstrating C-3PO's voice characteristics:
-```bash
-# Classic C-3PO phrases
-curl -X POST "http://localhost:7860/tts-c3po" \
-  -F "text=I am C-3PO, human-cyborg relations." \
-  -F "language=en" --output c3po_intro.wav
-curl -X POST "http://localhost:7860/tts-c3po" \
-  -F "text=The odds of successfully navigating an asteroid field are approximately 3,720 to 1." \
-  -F "language=en" --output c3po_odds.wav
-curl -X POST "http://localhost:7860/tts-c3po" \
-  -F "text=R2-D2, you know better than to trust a strange computer!" \
-  -F "language=en" --output c3po_r2d2.wav
-curl -X POST "http://localhost:7860/tts-c3po" \
-  -F "text=Oh my! How interesting!" \
-  -F "language=en" --output c3po_oh_my.wav
-```
-## 🌍 Multilingual C-3PO Support
-The C-3PO model maintains its distinctive voice characteristics across multiple languages:
-```python
-# Multilingual examples
-languages = [
-    ("Hello, I am C-3PO", "en"),
-    ("Hola, soy C-3PO", "es"),
-    ("Bonjour, je suis C-3PO", "fr"),
-    ("Guten Tag, ich bin C-3PO", "de"),
-    ("Ciao, sono C-3PO", "it"),
-    ("Olá, eu sou C-3PO", "pt")
-]
-for text, lang in languages:
-    response = requests.post("http://localhost:7860/tts-c3po",
-                           data={"text": text, "language": lang})
-    with open(f"c3po_{lang}.wav", "wb") as f:
-        f.write(response.content)
-```
-## 🔧 Voice Cloning Guide
-1. **Prepare Reference Audio:**
-   - Duration: 5-10 seconds (optimal)
-   - Format: WAV, MP3, or M4A
-   - Quality: Clear speech, minimal background noise
-   - Content: Natural speaking, preferably in target language
-2. **API Request:**
-   ```bash
-   curl -X POST "http://your-space.hf.space/tts" \
-     -F "text=Your text to synthesize" \
-     -F "language=en" \
-     -F "speaker_file=@your_reference.wav" \
-     --output result.wav
-   ```
-3. **Tips for Best Results:**
-   - Use high-quality reference audio
-   - Match the language of reference and target text
-   - Keep text length reasonable (under 500 characters)
-   - Experiment with different reference samples
-## Supported Languages
-The XTTS v2 model supports multiple languages including:
-- English (en)
-- Spanish (es)
-- French (fr)
-- German (de)
-- Italian (it)
-- Portuguese (pt)
-- Polish (pl)
-- Turkish (tr)
-- Russian (ru)
-- Dutch (nl)
-- Czech (cs)
-- Arabic (ar)
-- Chinese (zh-cn)
-- Japanese (ja)
-- Hungarian (hu)
-- Korean (ko)
-## Troubleshooting
-### Common Issues
-1. **Model Download Errors:**
-   - The first run downloads ~1.7GB model files
-   - Ensure stable internet connection
-   - Check Hugging Face Spaces logs
-2. **Audio Quality Issues:**
-   - Use high-quality reference audio for voice cloning
-   - Ensure reference audio matches target language
-   - Try different reference samples
-3. **Memory Issues on HF Spaces:**
-   - The model requires significant memory
-   - Consider upgrading to a higher-tier Space if needed
-4. **API Timeouts:**
-   - Initial model loading takes time
-   - Subsequent requests are faster
-   - Consider warming up the model with a test request
-### Environment Variables
-- `COQUI_TOS_AGREED=1`: Accepts Coqui TTS terms of service
-- `HF_HUB_DISABLE_TELEMETRY=1`: Disables telemetry
-- `TORCH_HOME`: PyTorch cache directory
-## API Documentation
-Once deployed, visit your Space URL and add `/docs` to access the interactive API documentation:
-```
-https://your-username-your-space-name.hf.space/docs
-```
-## Contributing
-1. Fork the repository
-2. Create a feature branch
-3. Make your changes
-4. Test thoroughly
-5. Submit a pull request
-## License
-This project uses the Coqui TTS library. Please check [Coqui TTS license](https://github.com/coqui-ai/TTS) for usage terms.
-## Credits
-- [Coqui TTS](https://github.com/coqui-ai/TTS) - The underlying TTS engine
-- [XTTS v2](https://arxiv.org/abs/2309.11321) - The voice cloning model
-- [FastAPI](https://fastapi.tiangolo.com/) - Web framework
-- [Hugging Face Spaces](https://huggingface.co/spaces) - Deployment platform

app.py DELETED Viewed

@@ -1,414 +0,0 @@
-# Import configuration first to setup environment
-import app_config
-import os
-import sys
-import io
-import subprocess
-import uuid
-import time
-import torch
-import torchaudio
-import tempfile
-import logging
-from typing import Optional
-# Fix PyTorch weights_only issue for XTTS
-import torch.serialization
-from TTS.tts.configs.xtts_config import XttsConfig
-torch.serialization.add_safe_globals([XttsConfig])
-# Set environment variables
-os.environ["COQUI_TOS_AGREED"] = "1"
-os.environ["NUMBA_DISABLE_JIT"] = "1"
-# Force CPU usage if specified
-if os.environ.get("FORCE_CPU", "false").lower() == "true":
-    os.environ["CUDA_VISIBLE_DEVICES"] = ""
-from fastapi import FastAPI, HTTPException, UploadFile, File, Form
-from fastapi.responses import FileResponse
-from pydantic import BaseModel
-import langid
-from scipy.io.wavfile import write
-from pydub import AudioSegment
-from TTS.api import TTS
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
-from TTS.utils.generic_utils import get_user_data_dir
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = FastAPI(title="XTTS C3PO API", description="Text-to-Speech API using XTTS-v2 C3PO model", version="1.0.0")
-class TTSRequest(BaseModel):
-    text: str
-    language: str = "en"
-    voice_cleanup: bool = False
-    no_lang_auto_detect: bool = False
-class XTTSService:
-    def __init__(self):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        logger.info(f"Using device: {self.device}")
-        # Use the C3PO model path
-        self.model_path = "XTTS-v2_C3PO/"
-        self.config_path = "XTTS-v2_C3PO/config.json"
-        # Check if model files exist, if not download them
-        if not os.path.exists(self.config_path):
-            logger.info("C3PO model not found locally, downloading...")
-            self._download_c3po_model()
-        # Load configuration
-        config = XttsConfig()
-        config.load_json(self.config_path)
-        # Initialize and load model
-        self.model = Xtts.init_from_config(config)
-        self.model.load_checkpoint(
-            config,
-            checkpoint_path=os.path.join(self.model_path, "model.pth"),
-            vocab_path=os.path.join(self.model_path, "vocab.json"),
-            eval=True,
-        )
-        if self.device == "cuda":
-            self.model.cuda()
-        self.supported_languages = config.languages
-        logger.info(f"XTTS C3PO model loaded successfully. Supported languages: {self.supported_languages}")
-        # Set default reference audio (C3PO voice)
-        self.default_reference = os.path.join(self.model_path, "reference.wav")
-        if not os.path.exists(self.default_reference):
-            # Look for any reference audio in the model directory
-            for file in os.listdir(self.model_path):
-                if file.endswith(('.wav', '.mp3', '.m4a')):
-                    self.default_reference = os.path.join(self.model_path, file)
-                    break
-            else:
-                self.default_reference = None
-        if self.default_reference:
-            logger.info(f"Default C3PO reference audio: {self.default_reference}")
-        else:
-            logger.warning("No default reference audio found in C3PO model directory")
-    def _download_c3po_model(self):
-        """Download the C3PO model from Hugging Face"""
-        try:
-            logger.info("Downloading C3PO model from Hugging Face...")
-            subprocess.run([
-                "git", "clone",
-                "https://huggingface.co/Borcherding/XTTS-v2_C3PO",
-                "XTTS-v2_C3PO"
-            ], check=True)
-            logger.info("C3PO model downloaded successfully")
-        except subprocess.CalledProcessError as e:
-            logger.error(f"Failed to download C3PO model: {e}")
-            raise HTTPException(status_code=500, detail="Failed to download C3PO model")
-    def generate_speech(self, text: str, speaker_wav_path: str = None, language: str = "en",
-                       voice_cleanup: bool = False, no_lang_auto_detect: bool = False) -> str:
-        """Generate speech and return the path to the output file"""
-        try:
-            # Use default C3PO voice if no speaker file provided
-            if speaker_wav_path is None:
-                if self.default_reference is None:
-                    raise HTTPException(status_code=400, detail="No reference audio available. Please upload a speaker file.")
-                speaker_wav_path = self.default_reference
-                logger.info("Using default C3PO voice")
-            # Validate language
-            if language not in self.supported_languages:
-                raise HTTPException(status_code=400, detail=f"Language '{language}' not supported. Supported: {self.supported_languages}")
-            # Language detection for longer texts
-            if len(text) > 15 and not no_lang_auto_detect:
-                language_predicted = langid.classify(text)[0].strip()
-                if language_predicted == "zh":
-                    language_predicted = "zh-cn"
-                if language_predicted != language:
-                    logger.warning(f"Detected language: {language_predicted}, chosen: {language}")
-            # Text length validation
-            if len(text) < 2:
-                raise HTTPException(status_code=400, detail="Text too short, please provide longer text")
-            if len(text) > 500:  # Increased limit for API
-                raise HTTPException(status_code=400, detail="Text too long, maximum 500 characters")
-            # Voice cleanup if requested
-            processed_speaker_wav = speaker_wav_path
-            if voice_cleanup:
-                processed_speaker_wav = self._cleanup_audio(speaker_wav_path)
-            # Generate conditioning latents
-            try:
-                gpt_cond_latent, speaker_embedding = self.model.get_conditioning_latents(
-                    audio_path=processed_speaker_wav,
-                    gpt_cond_len=30,
-                    gpt_cond_chunk_len=4,
-                    max_ref_length=60
-                )
-            except Exception as e:
-                logger.error(f"Speaker encoding error: {e}")
-                raise HTTPException(status_code=400, detail="Error processing reference audio. Please check the audio file.")
-            # Generate speech
-            logger.info("Generating speech...")
-            start_time = time.time()
-            out = self.model.inference(
-                text,
-                language,
-                gpt_cond_latent,
-                speaker_embedding,
-                repetition_penalty=5.0,
-                temperature=0.75,
-            )
-            inference_time = time.time() - start_time
-            logger.info(f"Speech generation completed in {inference_time:.2f} seconds")
-            # Save output
-            output_filename = f"xtts_c3po_output_{uuid.uuid4().hex}.wav"
-            output_path = os.path.join(tempfile.gettempdir(), output_filename)
-            torchaudio.save(output_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
-            return output_path
-        except Exception as e:
-            logger.error(f"Error generating speech: {e}")
-            if isinstance(e, HTTPException):
-                raise e
-            raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
-    def _cleanup_audio(self, audio_path: str) -> str:
-        """Apply audio cleanup filters"""
-        try:
-            output_path = audio_path + "_cleaned.wav"
-            # Basic audio cleanup using ffmpeg-python or similar
-            # For now, just return the original path
-            # You can implement more sophisticated cleanup here
-            return audio_path
-        except Exception as e:
-            logger.warning(f"Audio cleanup failed: {e}, using original audio")
-            return audio_path
-# Initialize XTTS service
-logger.info("Initializing XTTS C3PO service...")
-tts_service = XTTSService()
-@app.get("/")
-async def root():
-    return {"message": "XTTS C3PO API is running", "status": "healthy", "model": "C3PO"}
-@app.get("/health")
-async def health_check():
-    return {
-        "status": "healthy",
-        "device": tts_service.device,
-        "model": "XTTS-v2 C3PO",
-        "supported_languages": tts_service.supported_languages,
-        "default_voice": "C3PO" if tts_service.default_reference else "None"
-    }
-@app.get("/languages")
-async def get_languages():
-    """Get list of supported languages"""
-    return {"languages": tts_service.supported_languages}
-@app.post("/tts")
-async def text_to_speech(
-    text: str = Form(...),
-    language: str = Form("en"),
-    voice_cleanup: bool = Form(False),
-    no_lang_auto_detect: bool = Form(False),
-    speaker_file: UploadFile = File(None)
-):
-    """
-    Convert text to speech using XTTS C3PO voice cloning
-    - **text**: The text to convert to speech (max 500 characters)
-    - **language**: Language code (default: "en")
-    - **voice_cleanup**: Apply audio cleanup to reference voice
-    - **no_lang_auto_detect**: Disable automatic language detection
-    - **speaker_file**: Reference speaker audio file (optional, uses C3PO voice if not provided)
-    """
-    if not text.strip():
-        raise HTTPException(status_code=400, detail="Text cannot be empty")
-    speaker_temp_path = None
-    try:
-        # Handle speaker file if provided
-        if speaker_file is not None:
-            # Validate file type
-            if not speaker_file.content_type.startswith('audio/'):
-                raise HTTPException(status_code=400, detail="Speaker file must be an audio file")
-            # Save uploaded speaker file temporarily
-            speaker_temp_path = os.path.join(tempfile.gettempdir(), f"speaker_{uuid.uuid4().hex}.wav")
-            with open(speaker_temp_path, "wb") as buffer:
-                content = await speaker_file.read()
-                buffer.write(content)
-        # Generate speech (will use C3PO voice if no speaker file provided)
-        output_path = tts_service.generate_speech(
-            text,
-            speaker_temp_path,
-            language,
-            voice_cleanup,
-            no_lang_auto_detect
-        )
-        # Clean up temporary speaker file
-        if speaker_temp_path and os.path.exists(speaker_temp_path):
-            try:
-                os.remove(speaker_temp_path)
-            except:
-                pass
-        # Return the generated audio file
-        voice_type = "custom" if speaker_file else "c3po"
-        return FileResponse(
-            output_path,
-            media_type="audio/wav",
-            filename=f"xtts_{voice_type}_output_{uuid.uuid4().hex}.wav",
-            headers={"Content-Disposition": "attachment"}
-        )
-    except Exception as e:
-        # Clean up files in case of error
-        if speaker_temp_path and os.path.exists(speaker_temp_path):
-            try:
-                os.remove(speaker_temp_path)
-            except:
-                pass
-        logger.error(f"Error in TTS endpoint: {e}")
-        if isinstance(e, HTTPException):
-            raise e
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/tts-json")
-async def text_to_speech_json(
-    request: TTSRequest,
-    speaker_file: UploadFile = File(None)
-):
-    """
-    Convert text to speech using JSON request body
-    - **request**: TTSRequest containing text, language, and options
-    - **speaker_file**: Reference speaker audio file (optional, uses C3PO voice if not provided)
-    """
-    if not request.text.strip():
-        raise HTTPException(status_code=400, detail="Text cannot be empty")
-    speaker_temp_path = None
-    try:
-        # Handle speaker file if provided
-        if speaker_file is not None:
-            # Validate file type
-            if not speaker_file.content_type.startswith('audio/'):
-                raise HTTPException(status_code=400, detail="Speaker file must be an audio file")
-            # Save uploaded speaker file temporarily
-            speaker_temp_path = os.path.join(tempfile.gettempdir(), f"speaker_{uuid.uuid4().hex}.wav")
-            with open(speaker_temp_path, "wb") as buffer:
-                content = await speaker_file.read()
-                buffer.write(content)
-        # Generate speech
-        output_path = tts_service.generate_speech(
-            request.text,
-            speaker_temp_path,
-            request.language,
-            request.voice_cleanup,
-            request.no_lang_auto_detect
-        )
-        # Clean up temporary speaker file
-        if speaker_temp_path and os.path.exists(speaker_temp_path):
-            try:
-                os.remove(speaker_temp_path)
-            except:
-                pass
-        # Return the generated audio file
-        voice_type = "custom" if speaker_file else "c3po"
-        return FileResponse(
-            output_path,
-            media_type="audio/wav",
-            filename=f"xtts_{voice_type}_{request.language}_{uuid.uuid4().hex}.wav",
-            headers={"Content-Disposition": "attachment"}
-        )
-    except Exception as e:
-        # Clean up files in case of error
-        if speaker_temp_path and os.path.exists(speaker_temp_path):
-            try:
-                os.remove(speaker_temp_path)
-            except:
-                pass
-        logger.error(f"Error in TTS JSON endpoint: {e}")
-        if isinstance(e, HTTPException):
-            raise e
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/tts-c3po")
-async def text_to_speech_c3po_only(
-    text: str = Form(...),
-    language: str = Form("en"),
-    no_lang_auto_detect: bool = Form(False)
-):
-    """
-    Convert text to speech using C3PO voice only (no file upload needed)
-    - **text**: The text to convert to speech (max 500 characters)
-    - **language**: Language code (default: "en")
-    - **no_lang_auto_detect**: Disable automatic language detection
-    """
-    if not text.strip():
-        raise HTTPException(status_code=400, detail="Text cannot be empty")
-    try:
-        # Generate speech using C3PO voice
-        output_path = tts_service.generate_speech(
-            text,
-            None,  # Use default C3PO voice
-            language,
-            False,  # No voice cleanup needed for default voice
-            no_lang_auto_detect
-        )
-        # Return the generated audio file
-        return FileResponse(
-            output_path,
-            media_type="audio/wav",
-            filename=f"c3po_voice_{uuid.uuid4().hex}.wav",
-            headers={"Content-Disposition": "attachment"}
-        )
-    except Exception as e:
-        logger.error(f"Error in C3PO TTS endpoint: {e}")
-        if isinstance(e, HTTPException):
-            raise e
-        raise HTTPException(status_code=500, detail=str(e))

app_config.py DELETED Viewed

@@ -1,54 +0,0 @@
-"""
-Configuration for Kokoro TTS API, especially for Hugging Face Spaces deployment.
-"""
-import os
-import tempfile
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-def setup_hf_cache():
-    """Setup cache environment variables for Hugging Face Spaces"""
-    # Use user's home directory for cache
-    home_dir = os.path.expanduser("~")
-    cache_dir = os.path.join(home_dir, ".cache")
-    cache_settings = {
-        'HF_HOME': cache_dir,
-        'TRANSFORMERS_CACHE': cache_dir,
-        'HF_HUB_CACHE': cache_dir,
-        'TORCH_HOME': cache_dir,
-        'NUMBA_CACHE_DIR': os.path.join(cache_dir, 'numba'),
-        'NUMBA_DISABLE_JIT': '1',
-        'HF_HUB_DISABLE_TELEMETRY': '1'
-    }
-    # Set environment variables
-    for key, value in cache_settings.items():
-        os.environ[key] = value
-        logger.info(f"Set {key} to {value}")
-    # Create cache directories
-    cache_dirs = [cache_dir, os.path.join(cache_dir, 'numba')]
-    for cache_path in cache_dirs:
-        try:
-            os.makedirs(cache_path, exist_ok=True)
-            logger.info(f"Created cache directory: {cache_path}")
-        except Exception as e:
-            logger.warning(f"Could not create {cache_path}: {e}")
-    logger.info("Cache environment setup completed")
-def get_temp_dir():
-    """Get a writable temporary directory"""
-    return tempfile.gettempdir()
-def is_hf_spaces():
-    """Check if running on Hugging Face Spaces"""
-    return os.environ.get('SPACE_ID') is not None
-# Initialize cache setup
-setup_hf_cache()

client_example.py DELETED Viewed

@@ -1,269 +0,0 @@
-import requests
-import os
-def test_c3po_voice():
-    """Test the C3PO voice without uploading any files"""
-    # API endpoint for C3PO voice only
-    url = "http://localhost:7860/tts-c3po"
-    # Text to convert to speech
-    text = "Hello there! I am C-3PO, human-cyborg relations. How may I assist you today?"
-    # Prepare the request data
-    data = {
-        "text": text,
-        "language": "en",
-        "no_lang_auto_detect": False
-    }
-    try:
-        print("Testing C3PO voice...")
-        print(f"Text: {text}")
-        response = requests.post(url, data=data)
-        if response.status_code == 200:
-            # Save the generated audio
-            output_filename = "c3po_voice_sample.wav"
-            with open(output_filename, "wb") as f:
-                f.write(response.content)
-            print(f"Success! C3PO voice sample saved as {output_filename}")
-        else:
-            print(f"Error: {response.status_code}")
-            print(response.text)
-    except requests.exceptions.ConnectionError:
-        print("Error: Could not connect to the API. Make sure the server is running on http://localhost:7860")
-    except Exception as e:
-        print(f"Error: {e}")
-def test_xtts_with_custom_voice():
-    """Example of using XTTS with custom voice upload"""
-    # API endpoint
-    url = "http://localhost:7860/tts"
-    # Text to convert to speech
-    text = "This is a test of XTTS voice cloning with a custom reference voice."
-    # Path to your speaker reference audio file
-    speaker_file_path = "reference.wav"  # Update this path to your reference audio
-    # Check if speaker file exists
-    if not os.path.exists(speaker_file_path):
-        print(f"Custom voice test skipped: Speaker file not found at {speaker_file_path}")
-        print("To test custom voice cloning:")
-        print("1. Record 3-10 seconds of clear speech")
-        print("2. Save as 'reference.wav' in this directory")
-        print("3. Run this test again")
-        return
-    # Prepare the request data
-    data = {
-        "text": text,
-        "language": "en",
-        "voice_cleanup": False,
-        "no_lang_auto_detect": False
-    }
-    files = {
-        "speaker_file": open(speaker_file_path, "rb")
-    }
-    try:
-        print("Testing XTTS with custom voice...")
-        print(f"Text: {text}")
-        print(f"Speaker file: {speaker_file_path}")
-        response = requests.post(url, data=data, files=files)
-        if response.status_code == 200:
-            # Save the generated audio
-            output_filename = "custom_voice_clone.wav"
-            with open(output_filename, "wb") as f:
-                f.write(response.content)
-            print(f"Success! Custom voice clone saved as {output_filename}")
-        else:
-            print(f"Error: {response.status_code}")
-            print(response.text)
-    except requests.exceptions.ConnectionError:
-        print("Error: Could not connect to the API. Make sure the server is running on http://localhost:7860")
-    except Exception as e:
-        print(f"Error: {e}")
-    finally:
-        files["speaker_file"].close()
-def test_xtts_fallback_to_c3po():
-    """Test XTTS endpoint without speaker file (should use C3PO voice)"""
-    # API endpoint
-    url = "http://localhost:7860/tts"
-    # Text to convert to speech
-    text = "When no custom voice is provided, I will speak in the C3PO voice by default."
-    # Prepare the request data (no speaker file)
-    data = {
-        "text": text,
-        "language": "en",
-        "voice_cleanup": False,
-        "no_lang_auto_detect": False
-    }
-    try:
-        print("Testing XTTS fallback to C3PO voice...")
-        print(f"Text: {text}")
-        response = requests.post(url, data=data)
-        if response.status_code == 200:
-            # Save the generated audio
-            output_filename = "xtts_c3po_fallback.wav"
-            with open(output_filename, "wb") as f:
-                f.write(response.content)
-            print(f"Success! XTTS with C3PO fallback saved as {output_filename}")
-        else:
-            print(f"Error: {response.status_code}")
-            print(response.text)
-    except requests.exceptions.ConnectionError:
-        print("Error: Could not connect to the API. Make sure the server is running on http://localhost:7860")
-    except Exception as e:
-        print(f"Error: {e}")
-def test_multilingual_c3po():
-    """Test C3PO voice in different languages"""
-    # API endpoint for C3PO voice only
-    url = "http://localhost:7860/tts-c3po"
-    # Test different languages
-    test_cases = [
-        ("en", "Hello, I am C-3PO. I am fluent in over six million forms of communication."),
-        ("es", "Hola, soy C-3PO. Domino más de seis millones de formas de comunicación."),
-        ("fr", "Bonjour, je suis C-3PO. Je maîtrise plus de six millions de formes de communication."),
-        ("de", "Hallo, ich bin C-3PO. Ich beherrsche über sechs Millionen Kommunikationsformen."),
-    ]
-    for language, text in test_cases:
-        data = {
-            "text": text,
-            "language": language,
-            "no_lang_auto_detect": True  # Force the specified language
-        }
-        try:
-            print(f"Testing C3PO voice in {language.upper()}...")
-            print(f"Text: {text}")
-            response = requests.post(url, data=data)
-            if response.status_code == 200:
-                # Save the generated audio
-                output_filename = f"c3po_voice_{language}.wav"
-                with open(output_filename, "wb") as f:
-                    f.write(response.content)
-                print(f"Success! C3PO {language} voice saved as {output_filename}")
-            else:
-                print(f"Error: {response.status_code}")
-                print(response.text)
-        except requests.exceptions.ConnectionError:
-            print("Error: Could not connect to the API. Make sure the server is running on http://localhost:7860")
-        except Exception as e:
-            print(f"Error: {e}")
-        print()  # Add spacing between tests
-def get_supported_languages():
-    """Get list of supported languages"""
-    try:
-        response = requests.get("http://localhost:7860/languages")
-        if response.status_code == 200:
-            languages = response.json()
-            print("Supported languages:", languages["languages"])
-            return languages["languages"]
-        else:
-            print("Failed to get languages:", response.status_code)
-            return []
-    except requests.exceptions.ConnectionError:
-        print("API is not running. Start it with: uvicorn app:app --host 0.0.0.0 --port 7860")
-        return []
-def check_api_health():
-    """Check if the API is running"""
-    try:
-        response = requests.get("http://localhost:7860/health")
-        if response.status_code == 200:
-            health_info = response.json()
-            print("API Health Check:")
-            print(f"  Status: {health_info['status']}")
-            print(f"  Device: {health_info['device']}")
-            print(f"  Model: {health_info['model']}")
-            print(f"  Default Voice: {health_info['default_voice']}")
-            print(f"  Languages: {len(health_info['supported_languages'])} supported")
-            return True
-        else:
-            print("API health check failed:", response.status_code)
-            return False
-    except requests.exceptions.ConnectionError:
-        print("API is not running. Start it with: uvicorn app:app --host 0.0.0.0 --port 7860")
-        return False
-def create_sample_reference():
-    """Instructions for creating a reference audio file"""
-    print("\n" + "="*50)
-    print("REFERENCE AUDIO SETUP")
-    print("="*50)
-    print("To use XTTS voice cloning, you need a reference audio file:")
-    print("1. Record 3-10 seconds of clear speech")
-    print("2. Save as WAV format (recommended)")
-    print("3. Ensure good audio quality (no background noise)")
-    print("4. Place the file in the same directory as this script")
-    print("5. Update the 'speaker_file_path' variable in the functions above")
-    print("\nExample recording text:")
-    print("'Hello, this is my voice. I'm recording this sample for voice cloning.'")
-    print("="*50)
-if __name__ == "__main__":
-    print("XTTS C3PO API Client Example")
-    print("=" * 40)
-    # First check if API is running
-    if check_api_health():
-        print()
-        # Get supported languages
-        languages = get_supported_languages()
-        print()
-        # Test C3PO voice (no file upload needed)
-        print("1. Testing C3PO voice (no upload required)...")
-        test_c3po_voice()
-        print()
-        # Test XTTS fallback to C3PO
-        print("2. Testing XTTS endpoint without speaker file (C3PO fallback)...")
-        test_xtts_fallback_to_c3po()
-        print()
-        # Test custom voice if reference file exists
-        print("3. Testing custom voice cloning...")
-        test_xtts_with_custom_voice()
-        print()
-        # Test multilingual C3PO
-        print("4. Testing multilingual C3PO voice...")
-        test_multilingual_c3po()
-        print("All tests completed!")
-        print("\nGenerated files:")
-        for file in os.listdir("."):
-            if file.endswith(".wav") and ("c3po" in file or "custom" in file or "xtts" in file):
-                print(f"  - {file}")
-    else:
-        print("\nPlease start the API server first:")
-        print("uvicorn app:app --host 0.0.0.0 --port 7860")

requirements.txt CHANGED Viewed

@@ -1,13 +1,8 @@
-SpeechRecognition>=3.8.1
-gtts>=2.3.2
-openai-whisper>=20240930
-pygame>=2.5.2
-anyascii>=0.3.0
-einops>=0.6.0
-encodec>=0.1.1
-inflect>=5.6.0
-num2words>=0.5.14
-pysbd>=0.3.4
-tqdm>=4.64.1
-coqui-tts == 0.26.2
-huggingface_hub>=0.17.0

+fastapi>=0.104.1
+uvicorn>=0.24.0
+python-multipart>=0.0.6
+torch>=2.0.0
+torchaudio>=2.0.0
+coqui-tts>=0.22.0
+huggingface_hub>=0.17.0
+pydantic>=2.0.0

requirements_coqui.txt DELETED Viewed

@@ -1,12 +0,0 @@
-fastapi>=0.104.1
-uvicorn[standard]>=0.24.0
-python-multipart>=0.0.6
-coqui-tts==0.26.2
-torch>=2.0.0
-torchaudio>=2.0.0
-numpy>=1.24.0
-scipy>=1.11.0
-pydub>=0.25.1
-librosa>=0.10.0
-soundfile>=0.12.1
-typing-extensions>=4.8.0

start_c3po_api.py CHANGED Viewed

@@ -1,171 +1,52 @@
 #!/usr/bin/env python3
 """
-Startup script for C-3PO TTS API
-Handles model download, initialization, and server startup
 """
 import os
 import sys
-import subprocess
 import logging
-import time
-from pathlib import Path
 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
 logger = logging.getLogger(__name__)
-def check_dependencies():
-    """Check if all required dependencies are installed"""
-    logger.info("🔍 Checking dependencies...")
-    try:
-        import torch
-        import TTS
-        import fastapi
-        import huggingface_hub
-        logger.info("✅ All core dependencies found")
-        return True
-    except ImportError as e:
-        logger.error(f"❌ Missing dependency: {e}")
-        logger.info("💡 Install with: pip install -r requirements.txt")
-        return False
-def check_gpu():
-    """Check GPU availability"""
-    try:
-        import torch
-        if torch.cuda.is_available():
-            gpu_name = torch.cuda.get_device_name(0)
-            logger.info(f"🎮 GPU available: {gpu_name}")
-            return True
-        else:
-            logger.info("💻 No GPU available, using CPU")
-            return False
-    except Exception as e:
-        logger.warning(f"⚠️  GPU check failed: {e}")
-        return False
-def check_disk_space():
-    """Check available disk space for model download"""
-    try:
-        import shutil
-        free_space = shutil.disk_usage('.').free / (1024**3)  # GB
-        if free_space < 5:
-            logger.warning(f"⚠️  Low disk space: {free_space:.1f}GB available")
-            logger.warning("💽 C-3PO model requires ~2GB space")
-        else:
-            logger.info(f"💾 Disk space: {free_space:.1f}GB available")
-        return free_space > 2
-    except Exception as e:
-        logger.warning(f"⚠️  Disk space check failed: {e}")
-        return True
 def setup_environment():
-    """Set up environment variables"""
     os.environ["COQUI_TOS_AGREED"] = "1"
     os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
-    # Create models directory
-    models_dir = Path("./models")
-    models_dir.mkdir(exist_ok=True)
     logger.info("🌍 Environment configured")
-def install_dependencies():
-    """Install missing dependencies"""
-    logger.info("📦 Installing dependencies...")
-    try:
-        subprocess.check_call([
-            sys.executable, "-m", "pip", "install", "-r", "requirements.txt"
-        ])
-        logger.info("✅ Dependencies installed successfully")
-        return True
-    except subprocess.CalledProcessError as e:
-        logger.error(f"❌ Failed to install dependencies: {e}")
-        return False
-def test_model_download():
-    """Test if the C-3PO model can be downloaded"""
-    logger.info("🤖 Testing C-3PO model availability...")
     try:
-        from huggingface_hub import repo_info
-        # Check if the repo exists and is accessible
-        info = repo_info(repo_id="Borcherding/XTTS-v2_C3PO")
-        logger.info(f"✅ C-3PO model accessible: {info.id}")
-        logger.info(f"   Last modified: {info.last_modified}")
-        return True
-    except Exception as e:
-        logger.error(f"❌ C-3PO model not accessible: {e}")
-        return False
-def start_api_server():
-    """Start the FastAPI server"""
-    logger.info("🚀 Starting C-3PO TTS API server...")
-    try:
-        # Import and run the API
         import uvicorn
         from coqui_api import app
         logger.info("🎭 C-3PO TTS API starting on http://localhost:7860")
-        logger.info("📖 API documentation available at http://localhost:7860/docs")
-        uvicorn.run(
-            app,
-            host="0.0.0.0",
-            port=7860,
-            log_level="info"
-        )
     except Exception as e:
-        logger.error(f"❌ Failed to start API server: {e}")
-        return False
 def main():
     """Main startup sequence"""
-    print("🤖 C-3PO TTS API Startup")
-    print("=" * 50)
-    # Step 1: Check dependencies
-    if not check_dependencies():
-        logger.info("📦 Attempting to install dependencies...")
-        if not install_dependencies():
-            logger.error("❌ Failed to install dependencies. Exiting.")
-            sys.exit(1)
-    # Step 2: Setup environment
     setup_environment()
-    # Step 3: Check system resources
-    has_gpu = check_gpu()
-    has_space = check_disk_space()
-    if not has_space:
-        logger.error("❌ Insufficient disk space. Exiting.")
-        sys.exit(1)
-    # Step 4: Test model availability
-    if not test_model_download():
-        logger.warning("⚠️  C-3PO model may not be accessible")
-        logger.warning("   The API will fall back to standard XTTS v2")
-    # Step 5: Start the server
-    print("\n" + "=" * 50)
-    logger.info("🎬 All checks passed! Starting C-3PO TTS API...")
-    print("=" * 50)
     try:
-        start_api_server()
     except KeyboardInterrupt:
         logger.info("\n🛑 Server stopped by user")
     except Exception as e:

 #!/usr/bin/env python3
 """
+Simple startup script for C-3PO TTS API
 """
 import os
 import sys
 import logging
 # Configure logging
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 def setup_environment():
+    """Set up required environment variables"""
     os.environ["COQUI_TOS_AGREED"] = "1"
     os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
     logger.info("🌍 Environment configured")
+def start_api():
+    """Start the C-3PO TTS API"""
+    logger.info("🤖 Starting C-3PO TTS API...")
     try:
         import uvicorn
         from coqui_api import app
         logger.info("🎭 C-3PO TTS API starting on http://localhost:7860")
+        logger.info("📖 API documentation: http://localhost:7860/docs")
+        uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")
+    except ImportError as e:
+        logger.error(f"❌ Missing dependency: {e}")
+        logger.info("💡 Install with: pip install -r requirements.txt")
+        sys.exit(1)
     except Exception as e:
+        logger.error(f"❌ Failed to start API: {e}")
+        sys.exit(1)
 def main():
     """Main startup sequence"""
+    print("🤖 C-3PO TTS API")
+    print("=" * 30)
     setup_environment()
     try:
+        start_api()
     except KeyboardInterrupt:
         logger.info("\n🛑 Server stopped by user")
     except Exception as e:

startup.py DELETED Viewed

@@ -1,120 +0,0 @@
-#!/usr/bin/env python3
-"""
-Startup script for Kokoro TTS API on Hugging Face Spaces
-"""
-import os
-import sys
-import logging
-import subprocess
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-def check_environment():
-    """Check the environment and permissions"""
-    logger.info("=== Environment Check ===")
-    # Check if running on HF Spaces
-    space_id = os.environ.get('SPACE_ID')
-    if space_id:
-        logger.info(f"Running on Hugging Face Spaces: {space_id}")
-    else:
-        logger.info("Not running on Hugging Face Spaces")
-    # Check Python version
-    logger.info(f"Python version: {sys.version}")
-    # Check current user and home directory
-    logger.info(f"Current user: {os.getenv('USER', 'unknown')}")
-    logger.info(f"Home directory: {os.path.expanduser('~')}")
-    logger.info(f"Current working directory: {os.getcwd()}")
-    # Check available disk space
-    try:
-        result = subprocess.run(['df', '-h', '/tmp'], capture_output=True, text=True)
-        logger.info(f"Disk space in /tmp:\n{result.stdout}")
-    except Exception as e:
-        logger.warning(f"Could not check disk space: {e}")
-    # Check write permissions for important directories
-    test_dirs = ['/tmp', os.path.expanduser('~'), os.getcwd()]
-    for test_dir in test_dirs:
-        try:
-            test_file = os.path.join(test_dir, 'test_write.tmp')
-            with open(test_file, 'w') as f:
-                f.write('test')
-            os.remove(test_file)
-            logger.info(f"✅ Write permission OK: {test_dir}")
-        except Exception as e:
-            logger.warning(f"❌ Write permission failed: {test_dir} - {e}")
-def check_dependencies():
-    """Check if required packages are installed"""
-    logger.info("=== Checking dependencies ===")
-    required_packages = [
-        'kokoro',
-        'soundfile',
-        'torch',
-        'fastapi',
-        'uvicorn'
-    ]
-    for package in required_packages:
-        try:
-            __import__(package)
-            logger.info(f"✅ {package} is available")
-        except ImportError:
-            logger.error(f"❌ {package} is not available")
-def test_kokoro():
-    """Test Kokoro TTS functionality"""
-    logger.info("=== Testing Kokoro TTS ===")
-    try:
-        # Import after setting up environment
-        import app_config  # This will setup environment
-        from kokoro import KPipeline
-        logger.info("Initializing Kokoro pipeline...")
-        pipeline = KPipeline(lang_code='a')
-        logger.info("✅ Kokoro pipeline initialized successfully")
-        # Test generation
-        logger.info("Testing speech generation...")
-        text = "Hello, this is a test."
-        generator = pipeline(text, voice='af_heart')
-        for i, (gs, ps, audio) in enumerate(generator):
-            logger.info(f"✅ Generated audio segment {i}: gs={gs}, ps={ps}, audio shape: {audio.shape}")
-            break
-        logger.info("✅ Kokoro TTS test completed successfully")
-        return True
-    except Exception as e:
-        logger.error(f"❌ Kokoro TTS test failed: {e}")
-        import traceback
-        logger.error(f"Full traceback: {traceback.format_exc()}")
-        return False
-def main():
-    """Main startup function"""
-    logger.info("🚀 Starting Kokoro TTS API setup...")
-    check_environment()
-    check_dependencies()
-    if test_kokoro():
-        logger.info("🎉 All checks passed! Starting the API...")
-        # Import and start the app
-        import uvicorn
-        uvicorn.run("app:app", host="0.0.0.0", port=7860, log_level="info")
-    else:
-        logger.error("❌ Setup failed. Please check the logs above.")
-        sys.exit(1)
-if __name__ == "__main__":
-    main()

test.py DELETED Viewed

@@ -1,144 +0,0 @@
-import os
-import torch
-import torchaudio
-import subprocess
-# Set environment variables for CPU-only usage
-os.environ['COQUI_TOS_AGREED'] = '1'
-os.environ['NUMBA_DISABLE_JIT'] = '1'
-os.environ['FORCE_CPU'] = 'true'
-os.environ['CUDA_VISIBLE_DEVICES'] = ''
-# Fix PyTorch weights_only issue for XTTS
-import torch.serialization
-from TTS.tts.configs.xtts_config import XttsConfig
-torch.serialization.add_safe_globals([XttsConfig])
-from TTS.api import TTS
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
-from TTS.utils.generic_utils import get_user_data_dir
-print("Testing XTTS C3PO voice cloning...")
-# C3PO model path
-model_path = "XTTS-v2_C3PO/"
-config_path = "XTTS-v2_C3PO/config.json"
-# Check if model files exist, if not download them
-if not os.path.exists(config_path):
-    print("C3PO model not found locally, downloading...")
-    try:
-        subprocess.run([
-            "git", "clone",
-            "https://huggingface.co/Borcherding/XTTS-v2_C3PO",
-            "XTTS-v2_C3PO"
-        ], check=True)
-        print("C3PO model downloaded successfully")
-    except subprocess.CalledProcessError as e:
-        print(f"Failed to download C3PO model: {e}")
-        exit(1)
-# Load configuration
-config = XttsConfig()
-config.load_json(config_path)
-# Initialize and load model
-model = Xtts.init_from_config(config)
-model.load_checkpoint(
-    config,
-    checkpoint_path=os.path.join(model_path, "model.pth"),
-    vocab_path=os.path.join(model_path, "vocab.json"),
-    eval=True,
-)
-device = "cpu"  # Force CPU usage
-print(f"C3PO model loaded on {device} (forced CPU mode)")
-# Text to convert to speech
-text = "Hello there! I am C-3PO, human-cyborg relations. How may I assist you today?"
-# Look for reference audio in the C3PO model directory
-reference_audio_path = None
-for file in os.listdir(model_path):
-    if file.endswith(('.wav', '.mp3', '.m4a')):
-        reference_audio_path = os.path.join(model_path, file)
-        print(f"Found C3PO reference audio: {file}")
-        break
-# If no reference audio found, create a simple test reference
-if reference_audio_path is None:
-    print("No reference audio found in C3PO model, creating test reference...")
-    reference_audio_path = "test_reference.wav"
-    # Generate a simple sine wave as placeholder
-    import numpy as np
-    sample_rate = 24000
-    duration = 3  # seconds
-    frequency = 440  # Hz
-    t = np.linspace(0, duration, int(sample_rate * duration))
-    audio_data = 0.3 * np.sin(2 * np.pi * frequency * t)
-    # Save as WAV
-    torchaudio.save(reference_audio_path, torch.tensor(audio_data).unsqueeze(0), sample_rate)
-    print(f"Test reference audio created: {reference_audio_path}")
-try:
-    # Generate conditioning latents
-    print("Processing reference audio...")
-    gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
-        audio_path=reference_audio_path,
-        gpt_cond_len=30,
-        gpt_cond_chunk_len=4,
-        max_ref_length=60
-    )
-    # Generate speech
-    print("Generating C3PO speech...")
-    out = model.inference(
-        text,
-        "en",  # language
-        gpt_cond_latent,
-        speaker_embedding,
-        repetition_penalty=5.0,
-        temperature=0.75,
-    )
-    # Save output
-    output_path = "c3po_test_output.wav"
-    torchaudio.save(output_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
-    print(f"C3PO speech generated successfully! Saved as: {output_path}")
-    # Test multilingual capabilities
-    print("\nTesting multilingual C3PO...")
-    multilingual_tests = [
-        ("es", "Hola, soy C-3PO. Domino más de seis millones de formas de comunicación."),
-        ("fr", "Bonjour, je suis C-3PO. Je maîtrise plus de six millions de formes de communication."),
-        ("de", "Hallo, ich bin C-3PO. Ich beherrsche über sechs Millionen Kommunikationsformen."),
-    ]
-    for lang, test_text in multilingual_tests:
-        print(f"Generating {lang.upper()} speech...")
-        out = model.inference(
-            test_text,
-            lang,
-            gpt_cond_latent,
-            speaker_embedding,
-            repetition_penalty=5.0,
-            temperature=0.75,
-        )
-        output_path = f"c3po_test_{lang}.wav"
-        torchaudio.save(output_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
-        print(f"C3PO {lang.upper()} speech saved as: {output_path}")
-except Exception as e:
-    print(f"Error during speech generation: {e}")
-    import traceback
-    traceback.print_exc()
-print("XTTS C3PO test completed!")
-print("\nGenerated files:")
-for file in os.listdir("."):
-    if file.startswith("c3po_test") and file.endswith(".wav"):
-        print(f"  - {file}")

test_build.py ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env python3
+"""
+Simple build test for C-3PO TTS API
+Tests if all dependencies can be imported
+"""
+def test_imports():
+    """Test if all required packages can be imported"""
+    print("🔍 Testing imports...")
+    try:
+        import fastapi
+        print("✅ FastAPI")
+        import uvicorn
+        print("✅ Uvicorn")
+        import torch
+        print("✅ PyTorch")
+        import torchaudio
+        print("✅ TorchAudio")
+        import TTS
+        print("✅ Coqui TTS")
+        import huggingface_hub
+        print("✅ Hugging Face Hub")
+        import pydantic
+        print("✅ Pydantic")
+        return True
+    except ImportError as e:
+        print(f"❌ Import failed: {e}")
+        return False
+def test_api_creation():
+    """Test if the API can be created without errors"""
+    print("\n🚀 Testing API creation...")
+    try:
+        from coqui_api import app
+        print("✅ API created successfully")
+        return True
+    except Exception as e:
+        print(f"❌ API creation failed: {e}")
+        return False
+def main():
+    """Run build tests"""
+    print("🧪 C-3PO TTS Build Test")
+    print("=" * 30)
+    import_ok = test_imports()
+    api_ok = test_api_creation()
+    print("\n" + "=" * 30)
+    if import_ok and api_ok:
+        print("🎉 All tests passed! Ready to deploy.")
+        return 0
+    else:
+        print("❌ Some tests failed. Check dependencies.")
+        return 1
+if __name__ == "__main__":
+    exit(main())

test_coqui_api.py DELETED Viewed

@@ -1,146 +0,0 @@
-import requests
-import os
-import time
-# API base URL (update this to your deployed Hugging Face Space URL)
-BASE_URL = "http://localhost:7860"  # Change to your HF Space URL when deployed
-def test_health():
-    """Test the health endpoint"""
-    print("🔍 Testing health endpoint...")
-    try:
-        response = requests.get(f"{BASE_URL}/health")
-        if response.status_code == 200:
-            print("✅ Health check passed!")
-            print(f"Response: {response.json()}")
-        else:
-            print(f"❌ Health check failed: {response.status_code}")
-            print(f"Response: {response.text}")
-    except Exception as e:
-        print(f"❌ Health check error: {e}")
-def test_list_models():
-    """Test the models endpoint"""
-    print("\n🔍 Testing models endpoint...")
-    try:
-        response = requests.get(f"{BASE_URL}/models")
-        if response.status_code == 200:
-            models = response.json()
-            print("✅ Models endpoint working!")
-            print(f"Found {len(models.get('models', []))} models")
-            # Show first 5 models
-            for i, model in enumerate(models.get('models', [])[:5]):
-                print(f"  {i+1}. {model}")
-        else:
-            print(f"❌ Models endpoint failed: {response.status_code}")
-    except Exception as e:
-        print(f"❌ Models endpoint error: {e}")
-def test_simple_tts():
-    """Test simple text-to-speech without voice cloning"""
-    print("\n🔍 Testing simple TTS...")
-    try:
-        data = {
-            "text": "Hello world! This is a test of Coqui TTS.",
-            "language": "en"
-        }
-        response = requests.post(f"{BASE_URL}/tts", data=data)
-        if response.status_code == 200:
-            # Save the audio file
-            output_file = "simple_tts_output.wav"
-            with open(output_file, "wb") as f:
-                f.write(response.content)
-            print(f"✅ Simple TTS successful! Audio saved to: {output_file}")
-            print(f"File size: {len(response.content)} bytes")
-        else:
-            print(f"❌ Simple TTS failed: {response.status_code}")
-            print(f"Response: {response.text}")
-    except Exception as e:
-        print(f"❌ Simple TTS error: {e}")
-def test_voice_cloning(speaker_file_path=None):
-    """Test voice cloning with uploaded speaker file"""
-    if not speaker_file_path or not os.path.exists(speaker_file_path):
-        print("\n⚠️  Skipping voice cloning test - no speaker file provided")
-        print("   To test voice cloning, provide a .wav file path")
-        return
-    print(f"\n🔍 Testing voice cloning with: {speaker_file_path}")
-    try:
-        data = {
-            "text": "This is voice cloning using Coqui TTS. The voice should match the reference audio.",
-            "language": "en"
-        }
-        with open(speaker_file_path, "rb") as f:
-            files = {"speaker_file": f}
-            response = requests.post(f"{BASE_URL}/tts", data=data, files=files)
-        if response.status_code == 200:
-            # Save the cloned audio
-            output_file = "voice_cloned_output.wav"
-            with open(output_file, "wb") as f:
-                f.write(response.content)
-            print(f"✅ Voice cloning successful! Audio saved to: {output_file}")
-            print(f"File size: {len(response.content)} bytes")
-        else:
-            print(f"❌ Voice cloning failed: {response.status_code}")
-            print(f"Response: {response.text}")
-    except Exception as e:
-        print(f"❌ Voice cloning error: {e}")
-def test_json_tts():
-    """Test JSON endpoint"""
-    print("\n🔍 Testing JSON TTS endpoint...")
-    try:
-        import json
-        data = {
-            "text": "This is a JSON request test for Coqui TTS API.",
-            "language": "en"
-        }
-        response = requests.post(
-            f"{BASE_URL}/tts-json",
-            headers={"Content-Type": "application/json"},
-            data=json.dumps(data)
-        )
-        if response.status_code == 200:
-            output_file = "json_tts_output.wav"
-            with open(output_file, "wb") as f:
-                f.write(response.content)
-            print(f"✅ JSON TTS successful! Audio saved to: {output_file}")
-            print(f"File size: {len(response.content)} bytes")
-        else:
-            print(f"❌ JSON TTS failed: {response.status_code}")
-            print(f"Response: {response.text}")
-    except Exception as e:
-        print(f"❌ JSON TTS error: {e}")
-def main():
-    print("🐸 Testing Coqui TTS API")
-    print("=" * 50)
-    # Test all endpoints
-    test_health()
-    test_list_models()
-    test_simple_tts()
-    test_json_tts()
-    # Test voice cloning if speaker file is available
-    # You can specify a speaker file path here
-    speaker_file = None  # Change to your speaker file path
-    test_voice_cloning(speaker_file)
-    print("\n🎉 API testing completed!")
-    print("\nTo test voice cloning:")
-    print("1. Record a short audio sample (5-10 seconds)")
-    print("2. Save it as a .wav file")
-    print("3. Update speaker_file variable with the file path")
-    print("4. Run the test again")
-if __name__ == "__main__":
-    main()

test_coqui_tts.py DELETED Viewed

@@ -1,99 +0,0 @@
-import torch
-from TTS.api import TTS
-import os
-def test_coqui_tts():
-    """Test Coqui TTS functionality"""
-    # Get device
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Using device: {device}")
-    try:
-        # List available 🐸TTS models
-        print("\n=== Available TTS Models ===")
-        tts_instance = TTS()
-        models = tts_instance.list_models()
-        # Print first 10 models to avoid overwhelming output
-        print("First 10 available models:")
-        for i, model in enumerate(models[:10]):
-            print(f"{i+1}. {model}")
-        if len(models) > 10:
-            print(f"... and {len(models) - 10} more models")
-    except Exception as e:
-        print(f"Error listing models: {e}")
-        return
-    try:
-        # Initialize TTS with XTTS v2 model
-        print("\n=== Initializing XTTS v2 Model ===")
-        tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
-        print("XTTS v2 model loaded successfully!")
-        # List speakers if available
-        print("\n=== Available Speakers ===")
-        if hasattr(tts, 'speakers') and tts.speakers:
-            print("Available speakers:")
-            for speaker in tts.speakers[:10]:  # Show first 10
-                print(f"- {speaker}")
-            if len(tts.speakers) > 10:
-                print(f"... and {len(tts.speakers) - 10} more speakers")
-        else:
-            print("No preset speakers available or speakers list is empty")
-    except Exception as e:
-        print(f"Error initializing XTTS v2 model: {e}")
-        print("This might be due to model download requirements or missing dependencies")
-        return
-    try:
-        # Test TTS to file with preset speaker (if available)
-        print("\n=== Testing TTS to File ===")
-        output_file = "test_output.wav"
-        # Check if we have speakers available
-        if hasattr(tts, 'speakers') and tts.speakers:
-            # Use first available speaker
-            speaker_name = tts.speakers[0]
-            print(f"Using speaker: {speaker_name}")
-            tts.tts_to_file(
-                text="Hello world! This is a test of Coqui TTS library.",
-                speaker=speaker_name,
-                language="en",
-                file_path=output_file
-            )
-        else:
-            # Try without speaker specification
-            print("No speakers available, trying without speaker specification...")
-            tts.tts_to_file(
-                text="Hello world! This is a test of Coqui TTS library.",
-                language="en",
-                file_path=output_file
-            )
-        if os.path.exists(output_file):
-            print(f"✅ TTS successful! Audio saved to: {output_file}")
-            file_size = os.path.getsize(output_file)
-            print(f"File size: {file_size} bytes")
-        else:
-            print("❌ TTS failed - output file not created")
-    except Exception as e:
-        print(f"Error during TTS generation: {e}")
-    # Note about voice cloning
-    print("\n=== Voice Cloning Information ===")
-    print("To test voice cloning, you would need:")
-    print("1. A reference audio file (speaker_wav parameter)")
-    print("2. Use tts.tts() method instead of tts_to_file()")
-    print("Example:")
-    print('wav = tts.tts(text="Hello!", speaker_wav="reference.wav", language="en")')
-if __name__ == "__main__":
-    print("🐸 Testing Coqui TTS Library")
-    print("=" * 50)
-    test_coqui_tts()

test_kokoro_install.py DELETED Viewed

@@ -1,86 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple test script to verify Kokoro TTS installation and functionality.
-"""
-import os
-# Set basic environment variables
-os.environ['NUMBA_DISABLE_JIT'] = '1'
-def test_kokoro_import():
-    """Test if Kokoro can be imported"""
-    try:
-        from kokoro import KPipeline
-        import soundfile as sf
-        import torch
-        print("✅ All required packages imported successfully!")
-        return True
-    except ImportError as e:
-        print(f"❌ Import error: {e}")
-        return False
-def test_kokoro_pipeline():
-    """Test if Kokoro pipeline can be initialized"""
-    try:
-        from kokoro import KPipeline
-        pipeline = KPipeline(lang_code='a')
-        print("✅ Kokoro pipeline initialized successfully!")
-        return True
-    except Exception as e:
-        print(f"❌ Pipeline initialization error: {e}")
-        return False
-def test_kokoro_generation():
-    """Test if Kokoro can generate speech"""
-    try:
-        from kokoro import KPipeline
-        import soundfile as sf
-        pipeline = KPipeline(lang_code='a')
-        text = "Hello, this is a test of Kokoro TTS."
-        generator = pipeline(text, voice='af_heart')
-        for i, (gs, ps, audio) in enumerate(generator):
-            print(f"✅ Generated audio segment {i}: gs={gs}, ps={ps}")
-            # Save test audio
-            sf.write('test_kokoro.wav', audio, 24000)
-            print("✅ Test audio saved as 'test_kokoro.wav'")
-            break  # Just test the first segment
-        return True
-    except Exception as e:
-        print(f"❌ Speech generation error: {e}")
-        return False
-def main():
-    """Run all tests"""
-    print("🎤 Testing Kokoro TTS Installation")
-    print("=" * 40)
-    tests = [
-        ("Import Test", test_kokoro_import),
-        ("Pipeline Test", test_kokoro_pipeline),
-        ("Generation Test", test_kokoro_generation)
-    ]
-    passed = 0
-    total = len(tests)
-    for test_name, test_func in tests:
-        print(f"\n🔍 Running {test_name}...")
-        if test_func():
-            passed += 1
-        else:
-            print(f"❌ {test_name} failed!")
-    print(f"\n📊 Results: {passed}/{total} tests passed")
-    if passed == total:
-        print("🎉 All tests passed! Kokoro TTS is ready to use.")
-    else:
-        print("⚠️  Some tests failed. Please check the installation.")
-if __name__ == "__main__":
-    main()