MahmoudAshraf
/

mms-300m-1130-forced-aligner

Automatic Speech Recognition

forced-alignment

Inference Endpoints

Model card Files Files and versions Community

MahmoudAshraf commited on Jun 4, 2024

Commit

e311203

·

verified ·

1 Parent(s): 3bd2831

Update README.md

Files changed (1) hide show

README.md +2 -9

README.md CHANGED Viewed

@@ -176,7 +176,6 @@ pip install git+https://github.com/MahmoudAshraf97/ctc-forced-aligner.git
 ```
 ## Usage
-```python
 from ctc_forced_aligner import (
     load_audio,
     load_alignment_model,
@@ -189,6 +188,7 @@ from ctc_forced_aligner import (
 audio_path = "your/audio/path"
 text_path = "your/text/path"
 audio_waveform = load_audio(audio_path, model.dtype, model.device)
     emissions, stride = generate_emissions(
@@ -202,10 +202,7 @@ text = "".join(line for line in lines).replace("\n", " ").strip()
 alignment_model, alignment_tokenizer, alignment_dictionary = load_alignment_model(
     device,
     dtype=torch.float16 if device == "cuda" else torch.float32,
-    model_path="MahmoudAshraf/mms-300m-1130-forced-aligner"
 )
-# also compatible with other Wav2Vec2 Checkpoints such as
-# "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
 emissions, stride = generate_emissions(
@@ -213,14 +210,10 @@ emissions, stride = generate_emissions(
 )
-# romanization should be enabled when using multilingual models
-# it should be changed to `False` when using models that support the
-# native vocabulary of the text
 tokens_starred, text_starred = preprocess_text(
     text,
     romanize=True,
-    language=langs_to_iso[language],
 )

 ```
 ## Usage
 from ctc_forced_aligner import (
     load_audio,
     load_alignment_model,
 audio_path = "your/audio/path"
 text_path = "your/text/path"
+language = "iso" # ISO-639-3 Language code
 audio_waveform = load_audio(audio_path, model.dtype, model.device)
     emissions, stride = generate_emissions(
 alignment_model, alignment_tokenizer, alignment_dictionary = load_alignment_model(
     device,
     dtype=torch.float16 if device == "cuda" else torch.float32,
 )
 emissions, stride = generate_emissions(
 )
 tokens_starred, text_starred = preprocess_text(
     text,
     romanize=True,
+    language=language,
 )