Update README.md
Browse files
README.md
CHANGED
@@ -42,7 +42,7 @@ The model can generate audio on its own but its better to use a voice to prompt
|
|
42 |
- joke
|
43 |
- emma (the names do not correlate to any tribe or accent)
|
44 |
|
45 |
-
|
46 |
```python
|
47 |
# clone the YarnGPT repo to get access to the `audiotokenizer`
|
48 |
!git clone https://github.com/saheedniyi02/yarngpt.git
|
@@ -115,6 +115,103 @@ IPython.display.Audio(audio,rate=24000)
|
|
115 |
torchaudio.save(f"audio.wav", audio, sample_rate=24000)
|
116 |
```
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
## Model Description
|
119 |
|
120 |
- **Developed by:** [Saheedniyi](https://linkedin.com/in/azeez-saheed)
|
|
|
42 |
- joke
|
43 |
- emma (the names do not correlate to any tribe or accent)
|
44 |
|
45 |
+
### Prompt YarnGPT
|
46 |
```python
|
47 |
# clone the YarnGPT repo to get access to the `audiotokenizer`
|
48 |
!git clone https://github.com/saheedniyi02/yarngpt.git
|
|
|
115 |
torchaudio.save(f"audio.wav", audio, sample_rate=24000)
|
116 |
```
|
117 |
|
118 |
+
### Simple Nigerian Accented-NewsReader
|
119 |
+
```python
|
120 |
+
!git clone https://github.com/saheedniyi02/yarngpt.git
|
121 |
+
|
122 |
+
pip install outetts uroman trafilatura pydub
|
123 |
+
|
124 |
+
import os
|
125 |
+
import re
|
126 |
+
import json
|
127 |
+
import torch
|
128 |
+
import inflect
|
129 |
+
import random
|
130 |
+
import requests
|
131 |
+
import trafilatura
|
132 |
+
import inflect
|
133 |
+
import uroman as ur
|
134 |
+
import numpy as np
|
135 |
+
import torchaudio
|
136 |
+
import IPython
|
137 |
+
from pydub import AudioSegment
|
138 |
+
from pydub.effects import normalize
|
139 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
140 |
+
from outetts.wav_tokenizer.decoder import WavTokenizer
|
141 |
+
|
142 |
+
|
143 |
+
!wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
|
144 |
+
!wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
|
145 |
+
|
146 |
+
from yarngpt.audiotokenizer import AudioTokenizer
|
147 |
+
|
148 |
+
tokenizer_path="saheedniyi/YarnGPT"
|
149 |
+
wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
|
150 |
+
wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
audio_tokenizer=AudioTokenizer(
|
155 |
+
tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
|
156 |
+
)
|
157 |
+
|
158 |
+
|
159 |
+
model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
|
160 |
+
|
161 |
+
|
162 |
+
def split_text_into_chunks(text, word_limit=25):
|
163 |
+
"""
|
164 |
+
Function to split a long web page into reasonable chunks
|
165 |
+
"""
|
166 |
+
sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
|
167 |
+
chunks=[]
|
168 |
+
for sentence in sentences:
|
169 |
+
chunks.append(".")
|
170 |
+
sentence_splitted=sentence.split(" ")
|
171 |
+
num_words=len(sentence_splitted)
|
172 |
+
start_index=0
|
173 |
+
if num_words>word_limit:
|
174 |
+
while start_index<num_words:
|
175 |
+
end_index=min(num_words,start_index+word_limit)
|
176 |
+
chunks.append(" ".join(sentence_splitted[start_index:start_index+word_limit]))
|
177 |
+
start_index=end_index
|
178 |
+
else:
|
179 |
+
chunks.append(sentence)
|
180 |
+
return chunks
|
181 |
+
|
182 |
+
#Extracting the content of a webpage
|
183 |
+
page=requests.get("https://punchng.com/expensive-feud-how-burna-boy-cubana-chief-priests-fight-led-to-dollar-rain/")
|
184 |
+
content=trafilatura.extract(page.text)
|
185 |
+
chunks=split_text_into_chunks(content)
|
186 |
+
|
187 |
+
#Looping over the chunks and adding creating a large `all_codes` list
|
188 |
+
all_codes=[]
|
189 |
+
for i,chunk in enumerate(chunks):
|
190 |
+
print(i)
|
191 |
+
print("\n")
|
192 |
+
print(chunk)
|
193 |
+
if chunk==".":
|
194 |
+
#add silence for 0.5 seconds if we encounter a full stop
|
195 |
+
all_codes.extend([453]*20)
|
196 |
+
else:
|
197 |
+
prompt=audio_tokenizer.create_prompt(chunk,"chinenye")
|
198 |
+
input_ids=audio_tokenizer.tokenize_prompt(prompt)
|
199 |
+
output = model.generate(
|
200 |
+
input_ids=input_ids,
|
201 |
+
temperature=0.1,
|
202 |
+
repetition_penalty=1.1,
|
203 |
+
max_length=4000,
|
204 |
+
)
|
205 |
+
codes=audio_tokenizer.get_codes(output)
|
206 |
+
all_codes.extend(codes)
|
207 |
+
|
208 |
+
|
209 |
+
# Converting to audio
|
210 |
+
audio=audio_tokenizer.get_audio(all_codes)
|
211 |
+
IPython.display.Audio(audio,rate=24000)
|
212 |
+
torchaudio.save(f"news1.wav", audio, sample_rate=24000)
|
213 |
+
```
|
214 |
+
|
215 |
## Model Description
|
216 |
|
217 |
- **Developed by:** [Saheedniyi](https://linkedin.com/in/azeez-saheed)
|