Spaces:

fishaudio
/

fish-diffusion

Runtime error

Asteriski commited on Jun 8, 2023

Commit

49a207c

1 Parent(s): 1936ce4

Add ALYS + voice provider credits

A conversion of open-source vocal synth ALYS. https://vocalsynth.fandom.com/wiki/ALYS https://blog.phundrak.com/open-sourcing-alys

+ Adds voice provider credits to Tohoku Kiritan + Itako, No.7, Namine Ritsu, Azure Cobalt, and ALYS.

Files changed (3) hide show

checkpoints/ALYS.ckpt +3 -0
config.yaml +14 -6
configs/ALYS.py +48 -0

checkpoints/ALYS.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:475fb4b9f56f8d14812ee78e4d5b39b2e58f60d8d0350e84587ed21a9ba96fca
+size 409439345

config.yaml CHANGED Viewed

@@ -18,7 +18,7 @@ models:
     checkpoint: checkpoints/Kiritan.ckpt
     readme: |
       This model is trained on the Tohoku Kiritan dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
-      It has a cute, yet powerful voice.
     default_speaker: "kiritan"
   - name: "Tohoku Itako (Feminine)"
@@ -26,7 +26,7 @@ models:
     checkpoint: checkpoints/Itako.ckpt
     readme: |
       This model is trained on the Tohoku Itako dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
-      It has a bright and whispery voice.
     default_speaker: "itako"
   - name: "No.7 (Feminine)"
@@ -34,7 +34,7 @@ models:
     checkpoint: checkpoints/Seven.ckpt
     readme: |
       This model is trained on the No.7 dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
-      It has a strong and sharp voice.
     default_speaker: "seven"
   - name: "Yoko (Feminine)"
@@ -66,7 +66,7 @@ models:
     checkpoint: checkpoints/Ritsu.ckpt
     readme: |
       This model is trained on the Namine Ritsu ENUNU Dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
-      It has a powerful and throaty voice.
     default_speaker: "ritsu"
   - name: "S (Masculine)"
@@ -90,5 +90,13 @@ models:
     checkpoint: checkpoints/Azure.ckpt
     readme: |
       This model is trained on a dataset known as Azure Cobalt and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
-      It has a stable, mature voice.
-    default_speaker: "azure"

     checkpoint: checkpoints/Kiritan.ckpt
     readme: |
       This model is trained on the Tohoku Kiritan dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
+      It has a cute, yet powerful voice. CV: Akaneya Himika
     default_speaker: "kiritan"
   - name: "Tohoku Itako (Feminine)"
     checkpoint: checkpoints/Itako.ckpt
     readme: |
       This model is trained on the Tohoku Itako dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
+      It has a bright and whispery voice. CV: Kido Ibuki
     default_speaker: "itako"
   - name: "No.7 (Feminine)"
     checkpoint: checkpoints/Seven.ckpt
     readme: |
       This model is trained on the No.7 dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
+      It has a strong and sharp voice. CV: Koiwai Kotori
     default_speaker: "seven"
   - name: "Yoko (Feminine)"
     checkpoint: checkpoints/Ritsu.ckpt
     readme: |
       This model is trained on the Namine Ritsu ENUNU Dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
+      It has a powerful and throaty voice. CV: Canon
     default_speaker: "ritsu"
   - name: "S (Masculine)"
     checkpoint: checkpoints/Azure.ckpt
     readme: |
       This model is trained on a dataset known as Azure Cobalt and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
+      It has a stable, mature voice. CV: Aster
+    default_speaker: "azure"
+  - name: "ALYS (Feminine)"
+    config: configs/ALYS.py
+    checkpoint: checkpoints/ALYS.ckpt
+    readme: |
+      This model is trained on the ALYS DB 001 JPN dataset, originally produced by Voxwave and released under the [GPL-3.0](https://choosealicense.com/licenses/gpl-3.0/) license.
+      It has a slightly soft voice. CV: Poucet
+    default_speaker: "ALYS"

configs/ALYS.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from fish_diffusion.datasets.hifisinger import HiFiSVCDataset
+from fish_diffusion.datasets.utils import get_datasets_from_subfolder
+_base_ = [
+    "./_base_/archs/hifi_svc.py",
+    "./_base_/trainers/base.py",
+    "./_base_/schedulers/exponential.py",
+    "./_base_/datasets/hifi_svc.py",
+]
+speaker_mapping = {
+    "ALYS": 0,
+}
+model = dict(
+    type="HiFiSVC",
+    speaker_encoder=dict(
+        input_size=len(speaker_mapping),
+    ),
+)
+preprocessing = dict(
+    text_features_extractor=dict(
+        type="ContentVec",
+    ),
+    pitch_extractor=dict(
+        type="CrepePitchExtractor",
+        keep_zeros=False,
+        f0_min=40.0,
+        f0_max=1600.0,
+    ),
+    energy_extractor=dict(
+        type="RMSEnergyExtractor",
+    ),
+    augmentations=[
+        dict(
+            type="FixedPitchShifting",
+            key_shifts=[-5.0, 5.0],
+            probability=0.75,
+        ),
+    ],
+)
+trainer = dict(
+    # Disable gradient clipping, which is not supported by custom optimization
+    gradient_clip_val=None,
+    max_steps=1000000,
+)