saheedniyi
/

YarnGPT

@@ -42,7 +42,7 @@ The model can generate audio on its own but its better to use a voice to prompt
 - joke
 - emma (the names do not correlate to any tribe or accent)
 ```python
 # clone the YarnGPT repo to get access to the `audiotokenizer`
 !git clone https://github.com/saheedniyi02/yarngpt.git
@@ -115,6 +115,103 @@ IPython.display.Audio(audio,rate=24000)
 torchaudio.save(f"audio.wav", audio, sample_rate=24000)
 ```
 ## Model Description
 - **Developed by:** [Saheedniyi](https://linkedin.com/in/azeez-saheed)

 - joke
 - emma (the names do not correlate to any tribe or accent)
+### Prompt YarnGPT
 ```python
 # clone the YarnGPT repo to get access to the `audiotokenizer`
 !git clone https://github.com/saheedniyi02/yarngpt.git
 torchaudio.save(f"audio.wav", audio, sample_rate=24000)
 ```
+### Simple Nigerian Accented-NewsReader
+```python
+!git clone https://github.com/saheedniyi02/yarngpt.git
+pip install outetts uroman trafilatura pydub
+import os
+import re
+import json
+import torch
+import inflect
+import random
+import requests
+import trafilatura
+import inflect
+import uroman as ur
+import numpy as np
+import torchaudio
+import IPython
+from pydub import AudioSegment
+from pydub.effects import normalize
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from outetts.wav_tokenizer.decoder import WavTokenizer
+!wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
+!wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
+from yarngpt.audiotokenizer import AudioTokenizer
+tokenizer_path="saheedniyi/YarnGPT"
+wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
+audio_tokenizer=AudioTokenizer(
+    tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
+       )
+model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
+def split_text_into_chunks(text, word_limit=25):
+  """
+  Function to split a long web page into reasonable chunks
+  """
+  sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
+  chunks=[]
+  for sentence in sentences:
+    chunks.append(".")
+    sentence_splitted=sentence.split(" ")
+    num_words=len(sentence_splitted)
+    start_index=0
+    if num_words>word_limit:
+      while start_index<num_words:
+        end_index=min(num_words,start_index+word_limit)
+        chunks.append(" ".join(sentence_splitted[start_index:start_index+word_limit]))
+        start_index=end_index
+    else:
+      chunks.append(sentence)
+  return chunks
+#Extracting the content of a webpage
+page=requests.get("https://punchng.com/expensive-feud-how-burna-boy-cubana-chief-priests-fight-led-to-dollar-rain/")
+content=trafilatura.extract(page.text)
+chunks=split_text_into_chunks(content)
+#Looping over the chunks and adding creating a large `all_codes` list
+all_codes=[]
+for i,chunk in enumerate(chunks):
+  print(i)
+  print("\n")
+  print(chunk)
+  if chunk==".":
+    #add silence for 0.5 seconds if we encounter a full stop
+    all_codes.extend([453]*20)
+  else:
+    prompt=audio_tokenizer.create_prompt(chunk,"chinenye")
+    input_ids=audio_tokenizer.tokenize_prompt(prompt)
+    output  = model.generate(
+            input_ids=input_ids,
+            temperature=0.1,
+            repetition_penalty=1.1,
+            max_length=4000,
+        )
+    codes=audio_tokenizer.get_codes(output)
+    all_codes.extend(codes)
+# Converting to audio
+audio=audio_tokenizer.get_audio(all_codes)
+IPython.display.Audio(audio,rate=24000)
+torchaudio.save(f"news1.wav", audio, sample_rate=24000)
+```
 ## Model Description
 - **Developed by:** [Saheedniyi](https://linkedin.com/in/azeez-saheed)