Avinyaa commited on
Commit
6a83fff
·
1 Parent(s): a7aae29
Files changed (3) hide show
  1. README.md +50 -2
  2. requirements.txt +2 -8
  3. test.py +8 -9
README.md CHANGED
@@ -155,6 +155,40 @@ The C3PO model supports all XTTS-v2 languages:
155
 
156
  ## Setup
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  ### Hugging Face Spaces Deployment
159
 
160
  This API is optimized for Hugging Face Spaces with:
@@ -255,6 +289,13 @@ Automatically configured:
255
 
256
  ## Troubleshooting
257
 
 
 
 
 
 
 
 
258
  ### PyTorch Loading Issues
259
  The API includes fixes for PyTorch 2.6's `weights_only=True` default. If you encounter loading issues, ensure the compatibility fix is applied.
260
 
@@ -270,9 +311,16 @@ If the C3PO model fails to download:
270
  - Ensure reference audio is 3-10 seconds long
271
 
272
  ### Memory Issues
273
- - Use CPU mode for lower memory usage: set `CUDA_VISIBLE_DEVICES=""`
 
274
  - Reduce text length for batch processing
275
- - Consider using GPU with sufficient VRAM (4GB+ recommended)
 
 
 
 
 
 
276
 
277
  ## License
278
 
 
155
 
156
  ## Setup
157
 
158
+ ### CPU-Only Installation (Recommended for most users)
159
+
160
+ For CPU-only usage (no GPU required):
161
+ ```bash
162
+ # Ubuntu/Debian
163
+ sudo apt-get install espeak-ng ffmpeg git git-lfs
164
+
165
+ # macOS
166
+ brew install espeak ffmpeg git git-lfs
167
+ ```
168
+
169
+ 2. **Install CPU-only PyTorch and dependencies:**
170
+ ```bash
171
+ # Option 1: Use the provided script
172
+ chmod +x install_cpu.sh
173
+ ./install_cpu.sh
174
+
175
+ # Option 2: Manual installation
176
+ pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
177
+ pip install -r requirements.txt
178
+ python -m unidic download
179
+ ```
180
+
181
+ 3. **Set CPU-only environment variables:**
182
+ ```bash
183
+ export FORCE_CPU=true
184
+ export CUDA_VISIBLE_DEVICES=""
185
+ ```
186
+
187
+ 4. **Run the API:**
188
+ ```bash
189
+ uvicorn app:app --host 0.0.0.0 --port 7860
190
+ ```
191
+
192
  ### Hugging Face Spaces Deployment
193
 
194
  This API is optimized for Hugging Face Spaces with:
 
289
 
290
  ## Troubleshooting
291
 
292
+ ### CPU Performance
293
+ When running on CPU:
294
+ - Speech generation will be slower than GPU (30-60 seconds vs 3-5 seconds)
295
+ - Memory usage is lower (2-4GB RAM vs 4-8GB VRAM)
296
+ - No CUDA installation required
297
+ - Works on any system with sufficient RAM
298
+
299
  ### PyTorch Loading Issues
300
  The API includes fixes for PyTorch 2.6's `weights_only=True` default. If you encounter loading issues, ensure the compatibility fix is applied.
301
 
 
311
  - Ensure reference audio is 3-10 seconds long
312
 
313
  ### Memory Issues
314
+ - **CPU Mode**: Requires 2-4GB RAM, works on most modern computers
315
+ - **GPU Mode**: Requires 4GB+ VRAM for optimal performance
316
  - Reduce text length for batch processing
317
+ - Use CPU mode with `FORCE_CPU=true` environment variable
318
+
319
+ ### CPU-Only Installation Issues
320
+ If you encounter GPU-related errors:
321
+ 1. Set environment variables: `export FORCE_CPU=true CUDA_VISIBLE_DEVICES=""`
322
+ 2. Install CPU-only PyTorch: `pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu`
323
+ 3. Restart the API after setting environment variables
324
 
325
  ## License
326
 
requirements.txt CHANGED
@@ -7,11 +7,5 @@ mecab-python3==1.0.6
7
  unidic-lite==1.0.8
8
  unidic==1.1.0
9
  langid
10
- pydub
11
- fastapi
12
- uvicorn[standard]
13
- torch
14
- torchaudio
15
- soundfile
16
- scipy
17
- numpy
 
7
  unidic-lite==1.0.8
8
  unidic==1.1.0
9
  langid
10
+ uvicorn
11
+ pydub
 
 
 
 
 
 
test.py CHANGED
@@ -3,15 +3,17 @@ import torch
3
  import torchaudio
4
  import subprocess
5
 
 
 
 
 
 
 
6
  # Fix PyTorch weights_only issue for XTTS
7
  import torch.serialization
8
  from TTS.tts.configs.xtts_config import XttsConfig
9
  torch.serialization.add_safe_globals([XttsConfig])
10
 
11
- # Set environment variables
12
- os.environ['COQUI_TOS_AGREED'] = '1'
13
- os.environ['NUMBA_DISABLE_JIT'] = '1'
14
-
15
  from TTS.api import TTS
16
  from TTS.tts.configs.xtts_config import XttsConfig
17
  from TTS.tts.models.xtts import Xtts
@@ -50,11 +52,8 @@ model.load_checkpoint(
50
  eval=True,
51
  )
52
 
53
- device = "cuda" if torch.cuda.is_available() else "cpu"
54
- if device == "cuda":
55
- model.cuda()
56
-
57
- print(f"C3PO model loaded on {device}")
58
 
59
  # Text to convert to speech
60
  text = "Hello there! I am C-3PO, human-cyborg relations. How may I assist you today?"
 
3
  import torchaudio
4
  import subprocess
5
 
6
+ # Set environment variables for CPU-only usage
7
+ os.environ['COQUI_TOS_AGREED'] = '1'
8
+ os.environ['NUMBA_DISABLE_JIT'] = '1'
9
+ os.environ['FORCE_CPU'] = 'true'
10
+ os.environ['CUDA_VISIBLE_DEVICES'] = ''
11
+
12
  # Fix PyTorch weights_only issue for XTTS
13
  import torch.serialization
14
  from TTS.tts.configs.xtts_config import XttsConfig
15
  torch.serialization.add_safe_globals([XttsConfig])
16
 
 
 
 
 
17
  from TTS.api import TTS
18
  from TTS.tts.configs.xtts_config import XttsConfig
19
  from TTS.tts.models.xtts import Xtts
 
52
  eval=True,
53
  )
54
 
55
+ device = "cpu" # Force CPU usage
56
+ print(f"C3PO model loaded on {device} (forced CPU mode)")
 
 
 
57
 
58
  # Text to convert to speech
59
  text = "Hello there! I am C-3PO, human-cyborg relations. How may I assist you today?"