Spaces:
Configuration error
Configuration error
add download models script and fastapi server to serve tts
Browse files- README.md +12 -8
- download.py +6 -0
- main.py +40 -0
- requirements.txt +3 -1
README.md
CHANGED
|
@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
|
|
| 37 |
|
| 38 |
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
| 44 |
-
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
|
| 45 |
-
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
|
| 46 |
-
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
|
| 47 |
```
|
| 48 |
|
|
|
|
| 49 |
``` sh
|
| 50 |
-
# git模型下载,请确保已安装git lfs
|
| 51 |
mkdir -p pretrained_models
|
| 52 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
|
| 53 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
|
|
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
|
|
| 120 |
For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
|
| 121 |
You can get familiar with CosyVoice following this recipie.
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
**Build for deployment**
|
| 124 |
|
| 125 |
Optionally, if you want to use grpc for service deployment,
|
|
|
|
| 37 |
|
| 38 |
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
| 39 |
|
| 40 |
+
Download models with python script.
|
| 41 |
+
``` shell
|
| 42 |
+
python download.py
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
```
|
| 44 |
|
| 45 |
+
Download models with git, you should install `git lfs` first.
|
| 46 |
``` sh
|
|
|
|
| 47 |
mkdir -p pretrained_models
|
| 48 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
|
| 49 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
|
|
|
|
| 116 |
For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
|
| 117 |
You can get familiar with CosyVoice following this recipie.
|
| 118 |
|
| 119 |
+
**Serve with FastAPI**
|
| 120 |
+
```sh
|
| 121 |
+
# For development
|
| 122 |
+
fastapi dev --port 3003
|
| 123 |
+
# For production
|
| 124 |
+
fastapi run --port 3003
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
**Build for deployment**
|
| 128 |
|
| 129 |
Optionally, if you want to use grpc for service deployment,
|
download.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SDK模型下载
|
| 2 |
+
from modelscope import snapshot_download
|
| 3 |
+
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
| 4 |
+
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
|
| 5 |
+
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
|
| 6 |
+
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
|
main.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io,time
|
| 2 |
+
from fastapi import FastAPI, Response
|
| 3 |
+
from fastapi.responses import HTMLResponse
|
| 4 |
+
from cosyvoice.cli.cosyvoice import CosyVoice
|
| 5 |
+
import torchaudio
|
| 6 |
+
|
| 7 |
+
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
|
| 8 |
+
# sft usage
|
| 9 |
+
print(cosyvoice.list_avaliable_spks())
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
|
| 12 |
+
@app.get("/api/voice/tts")
|
| 13 |
+
async def tts(query: str, role: str):
|
| 14 |
+
start = time.process_time()
|
| 15 |
+
output = cosyvoice.inference_sft(query, role)
|
| 16 |
+
end = time.process_time()
|
| 17 |
+
print("infer time:", end-start, "seconds")
|
| 18 |
+
buffer = io.BytesIO()
|
| 19 |
+
torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
|
| 20 |
+
buffer.seek(0)
|
| 21 |
+
return Response(content=buffer.read(-1), media_type="audio/wav")
|
| 22 |
+
|
| 23 |
+
@app.get("/api/voice/roles")
|
| 24 |
+
async def roles():
|
| 25 |
+
return {"roles": cosyvoice.list_avaliable_spks()}
|
| 26 |
+
|
| 27 |
+
@app.get("/", response_class=HTMLResponse)
|
| 28 |
+
async def root():
|
| 29 |
+
return """
|
| 30 |
+
<!DOCTYPE html>
|
| 31 |
+
<html lang=zh-cn>
|
| 32 |
+
<head>
|
| 33 |
+
<meta charset=utf-8>
|
| 34 |
+
<title>Api information</title>
|
| 35 |
+
</head>
|
| 36 |
+
<body>
|
| 37 |
+
Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
|
| 38 |
+
</body>
|
| 39 |
+
</html>
|
| 40 |
+
"""
|
requirements.txt
CHANGED
|
@@ -25,4 +25,6 @@ soundfile==0.12.1
|
|
| 25 |
tensorboard==2.14.0
|
| 26 |
torch==2.0.1
|
| 27 |
torchaudio==2.0.2
|
| 28 |
-
wget==3.2
|
|
|
|
|
|
|
|
|
| 25 |
tensorboard==2.14.0
|
| 26 |
torch==2.0.1
|
| 27 |
torchaudio==2.0.2
|
| 28 |
+
wget==3.2
|
| 29 |
+
fastapi==0.111.0
|
| 30 |
+
fastapi-cli==0.0.4
|