hackergeek98 commited on
Commit
79adb43
·
verified ·
1 Parent(s): 75f6f4f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +72 -1
README.md CHANGED
@@ -66,4 +66,75 @@ The following hyperparameters were used during training:
66
  - Transformers 4.49.0
67
  - Pytorch 2.6.0+cu124
68
  - Datasets 3.4.1
69
- - Tokenizers 0.21.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  - Transformers 4.49.0
67
  - Pytorch 2.6.0+cu124
68
  - Datasets 3.4.1
69
+ - Tokenizers 0.21.1
70
+
71
+ ## how to use the model in colab:
72
+
73
+ # Install required packages
74
+ !pip install torch torchaudio transformers pydub google-colab
75
+
76
+ import torch
77
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
78
+ from pydub import AudioSegment
79
+ import os
80
+ from google.colab import files
81
+
82
+ # Load the model and processor
83
+ model_id = "hackergeek98/tinyyyy_whisper"
84
+ device = "cuda" if torch.cuda.is_available() else "cpu"
85
+
86
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
87
+ processor = AutoProcessor.from_pretrained(model_id)
88
+
89
+ # Create pipeline
90
+ whisper_pipe = pipeline(
91
+ "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 if torch.cuda.is_available() else -1
92
+ )
93
+
94
+ # Convert audio to WAV format
95
+ def convert_to_wav(audio_path):
96
+ audio = AudioSegment.from_file(audio_path)
97
+ wav_path = "converted_audio.wav"
98
+ audio.export(wav_path, format="wav")
99
+ return wav_path
100
+
101
+ # Split long audio into chunks
102
+ def split_audio(audio_path, chunk_length_ms=30000): # Default: 30 sec per chunk
103
+ audio = AudioSegment.from_wav(audio_path)
104
+ chunks = [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
105
+ chunk_paths = []
106
+
107
+ for i, chunk in enumerate(chunks):
108
+ chunk_path = f"chunk_{i}.wav"
109
+ chunk.export(chunk_path, format="wav")
110
+ chunk_paths.append(chunk_path)
111
+
112
+ return chunk_paths
113
+
114
+ # Transcribe a long audio file
115
+ def transcribe_long_audio(audio_path):
116
+ wav_path = convert_to_wav(audio_path)
117
+ chunk_paths = split_audio(wav_path)
118
+ transcription = ""
119
+
120
+ for chunk in chunk_paths:
121
+ result = whisper_pipe(chunk)
122
+ transcription += result["text"] + "\n"
123
+ os.remove(chunk) # Remove processed chunk
124
+
125
+ os.remove(wav_path) # Cleanup original file
126
+
127
+ # Save transcription to a text file
128
+ text_path = "transcription.txt"
129
+ with open(text_path, "w") as f:
130
+ f.write(transcription)
131
+
132
+ return text_path
133
+
134
+ # Upload and process audio in Colab
135
+ uploaded = files.upload()
136
+ audio_file = list(uploaded.keys())[0]
137
+ transcription_file = transcribe_long_audio(audio_file)
138
+
139
+ # Download the transcription file
140
+ files.download(transcription_file)