hackergeek98
/

whisper-fa-tinyyy

Automatic Speech Recognition

Generated from Trainer

Model card Files Files and versions Community

hackergeek98 commited on 13 days ago

Commit

79adb43

·

verified ·

1 Parent(s): 75f6f4f

Update README.md

Files changed (1) hide show

README.md +72 -1

README.md CHANGED Viewed

@@ -66,4 +66,75 @@ The following hyperparameters were used during training:
 - Transformers 4.49.0
 - Pytorch 2.6.0+cu124
 - Datasets 3.4.1
-- Tokenizers 0.21.1

 - Transformers 4.49.0
 - Pytorch 2.6.0+cu124
 - Datasets 3.4.1
+- Tokenizers 0.21.1
+## how to use the model in colab:
+    # Install required packages
+    !pip install torch torchaudio transformers pydub google-colab
+    import torch
+    from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+    from pydub import AudioSegment
+    import os
+    from google.colab import files
+    # Load the model and processor
+    model_id = "hackergeek98/tinyyyy_whisper"
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
+    processor = AutoProcessor.from_pretrained(model_id)
+    # Create pipeline
+    whisper_pipe = pipeline(
+        "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 if torch.cuda.is_available() else -1
+    )
+    # Convert audio to WAV format
+    def convert_to_wav(audio_path):
+        audio = AudioSegment.from_file(audio_path)
+        wav_path = "converted_audio.wav"
+        audio.export(wav_path, format="wav")
+        return wav_path
+    # Split long audio into chunks
+    def split_audio(audio_path, chunk_length_ms=30000):  # Default: 30 sec per chunk
+        audio = AudioSegment.from_wav(audio_path)
+        chunks = [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
+        chunk_paths = []
+        for i, chunk in enumerate(chunks):
+            chunk_path = f"chunk_{i}.wav"
+            chunk.export(chunk_path, format="wav")
+            chunk_paths.append(chunk_path)
+        return chunk_paths
+    # Transcribe a long audio file
+    def transcribe_long_audio(audio_path):
+        wav_path = convert_to_wav(audio_path)
+        chunk_paths = split_audio(wav_path)
+        transcription = ""
+        for chunk in chunk_paths:
+            result = whisper_pipe(chunk)
+            transcription += result["text"] + "\n"
+            os.remove(chunk)  # Remove processed chunk
+        os.remove(wav_path)  # Cleanup original file
+        # Save transcription to a text file
+        text_path = "transcription.txt"
+        with open(text_path, "w") as f:
+            f.write(transcription)
+        return text_path
+    # Upload and process audio in Colab
+    uploaded = files.upload()
+    audio_file = list(uploaded.keys())[0]
+    transcription_file = transcribe_long_audio(audio_file)
+    # Download the transcription file
+    files.download(transcription_file)