Spaces:
Configuration error
Configuration error
remove academic and change to iic/CosyVoice_ttsfrd
Browse files- .gitmodules +0 -3
- README.md +4 -4
- cosyvoice/cli/frontend.py +1 -1
- cosyvoice/hifigan/generator.py +2 -2
- cosyvoice/utils/common.py +10 -0
- runtime/python/Dockerfile +5 -2
- third_party/AcademiCodec +0 -1
.gitmodules
CHANGED
|
@@ -1,6 +1,3 @@
|
|
| 1 |
-
[submodule "third_party/AcademiCodec"]
|
| 2 |
-
path = third_party/AcademiCodec
|
| 3 |
-
url = https://github.com/yangdongchao/AcademiCodec.git
|
| 4 |
[submodule "third_party/Matcha-TTS"]
|
| 5 |
path = third_party/Matcha-TTS
|
| 6 |
url = https://github.com/shivammehta25/Matcha-TTS.git
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
[submodule "third_party/Matcha-TTS"]
|
| 2 |
path = third_party/Matcha-TTS
|
| 3 |
url = https://github.com/shivammehta25/Matcha-TTS.git
|
README.md
CHANGED
|
@@ -33,7 +33,7 @@ sudo yum install sox sox-devel
|
|
| 33 |
|
| 34 |
**Model download**
|
| 35 |
|
| 36 |
-
We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `
|
| 37 |
|
| 38 |
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
| 39 |
|
|
@@ -43,7 +43,7 @@ from modelscope import snapshot_download
|
|
| 43 |
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
| 44 |
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
|
| 45 |
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
|
| 46 |
-
snapshot_download('
|
| 47 |
```
|
| 48 |
|
| 49 |
``` sh
|
|
@@ -52,12 +52,12 @@ mkdir -p pretrained_models
|
|
| 52 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
|
| 53 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
|
| 54 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
|
| 55 |
-
git clone https://www.modelscope.cn/
|
| 56 |
```
|
| 57 |
|
| 58 |
Unzip `ttsfrd` resouce and install `ttsfrd` package
|
| 59 |
``` sh
|
| 60 |
-
cd pretrained_models/
|
| 61 |
unzip resource.zip -d .
|
| 62 |
pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
|
| 63 |
```
|
|
|
|
| 33 |
|
| 34 |
**Model download**
|
| 35 |
|
| 36 |
+
We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
|
| 37 |
|
| 38 |
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
| 39 |
|
|
|
|
| 43 |
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
| 44 |
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
|
| 45 |
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
|
| 46 |
+
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
|
| 47 |
```
|
| 48 |
|
| 49 |
``` sh
|
|
|
|
| 52 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
|
| 53 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
|
| 54 |
git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
|
| 55 |
+
git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
|
| 56 |
```
|
| 57 |
|
| 58 |
Unzip `ttsfrd` resouce and install `ttsfrd` package
|
| 59 |
``` sh
|
| 60 |
+
cd pretrained_models/CosyVoice-ttsfrd/
|
| 61 |
unzip resource.zip -d .
|
| 62 |
pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
|
| 63 |
```
|
cosyvoice/cli/frontend.py
CHANGED
|
@@ -50,7 +50,7 @@ class CosyVoiceFrontEnd:
|
|
| 50 |
self.inflect_parser = inflect.engine()
|
| 51 |
self.frd = ttsfrd.TtsFrontendEngine()
|
| 52 |
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 53 |
-
assert self.frd.initialize('{}/../../pretrained_models/
|
| 54 |
self.frd.set_lang_type('pinyin')
|
| 55 |
self.frd.enable_pinyin_mix(True)
|
| 56 |
self.frd.set_breakmodel_index(1)
|
|
|
|
| 50 |
self.inflect_parser = inflect.engine()
|
| 51 |
self.frd = ttsfrd.TtsFrontendEngine()
|
| 52 |
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 53 |
+
assert self.frd.initialize('{}/../../pretrained_models/CosyVoice-ttsfrd/resource'.format(ROOT_DIR)) is True, 'failed to initialize ttsfrd resource'
|
| 54 |
self.frd.set_lang_type('pinyin')
|
| 55 |
self.frd.enable_pinyin_mix(True)
|
| 56 |
self.frd.set_breakmodel_index(1)
|
cosyvoice/hifigan/generator.py
CHANGED
|
@@ -27,8 +27,8 @@ from torch.nn.utils import weight_norm
|
|
| 27 |
from torch.distributions.uniform import Uniform
|
| 28 |
|
| 29 |
from cosyvoice.transformer.activation import Snake
|
| 30 |
-
from
|
| 31 |
-
from
|
| 32 |
|
| 33 |
|
| 34 |
"""hifigan based generator implementation.
|
|
|
|
| 27 |
from torch.distributions.uniform import Uniform
|
| 28 |
|
| 29 |
from cosyvoice.transformer.activation import Snake
|
| 30 |
+
from cosyvoice.utils.common import get_padding
|
| 31 |
+
from cosyvoice.utils.common import init_weights
|
| 32 |
|
| 33 |
|
| 34 |
"""hifigan based generator implementation.
|
cosyvoice/utils/common.py
CHANGED
|
@@ -91,3 +91,13 @@ def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
|
|
| 91 |
pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
|
| 92 |
denominator = torch.sum(mask)
|
| 93 |
return (numerator / denominator).detach()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
|
| 92 |
denominator = torch.sum(mask)
|
| 93 |
return (numerator / denominator).detach()
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def get_padding(kernel_size, dilation=1):
|
| 97 |
+
return int((kernel_size * dilation - dilation) / 2)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def init_weights(m, mean=0.0, std=0.01):
|
| 101 |
+
classname = m.__class__.__name__
|
| 102 |
+
if classname.find("Conv") != -1:
|
| 103 |
+
m.weight.data.normal_(mean, std)
|
runtime/python/Dockerfile
CHANGED
|
@@ -5,8 +5,11 @@ WORKDIR /opt/CosyVoice
|
|
| 5 |
|
| 6 |
RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
|
| 7 |
RUN apt-get update -y
|
| 8 |
-
RUN apt-get -y install python3-dev cmake python3-pip git
|
| 9 |
RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
|
| 10 |
-
RUN cd CosyVoice && pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
|
|
|
|
|
|
|
|
|
|
| 11 |
RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
|
| 12 |
CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"]
|
|
|
|
| 5 |
|
| 6 |
RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
|
| 7 |
RUN apt-get update -y
|
| 8 |
+
RUN apt-get -y install python3-dev cmake python3-pip git unzip
|
| 9 |
RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
|
| 10 |
+
RUN cd CosyVoice && pip3 install --default-timeout=3600 -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
|
| 11 |
+
RUN apt install git-lfs && git lfs install
|
| 12 |
+
RUN cd CosyVoice && git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
|
| 13 |
+
RUN cd CosyVoice/pretrained_models/CosyVoice-ttsfrd && unzip resource.zip -d . && pip3 install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
|
| 14 |
RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
|
| 15 |
CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"]
|
third_party/AcademiCodec
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit b6ac134735f6079543db959a60eb77a7bab4277b
|
|
|
|
|
|