SujithPulikodan commited on
Commit
a9b475d
·
verified ·
1 Parent(s): a07ace6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +59 -3
README.md CHANGED
@@ -1,3 +1,59 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - ARTPARK-IISc/Vaani
5
+ language:
6
+ - kn
7
+ base_model:
8
+ - openai/whisper-medium
9
+ pipeline_tag: automatic-speech-recognition
10
+ ---
11
+ ```python
12
+
13
+ import torch
14
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperTokenizer,WhisperFeatureExtractor
15
+ import soundfile as sf
16
+
17
+
18
+ model="ARTPARK-IISc/whisper-small-vaani-medium"
19
+
20
+ # Load tokenizer and feature extractor individually
21
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(model)
22
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-medium", language="Kannada", task="transcribe")
23
+
24
+
25
+ # Create the processor manually
26
+ processor = WhisperProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
27
+
28
+ # Load and preprocess the audio file
29
+ audio_file_path = "Sample_Audio.wav" # replace with your audio file path
30
+
31
+
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+
34
+ # Load the processor and model
35
+ model = WhisperForConditionalGeneration.from_pretrained(model).to(device)
36
+
37
+
38
+ # load audio
39
+ audio_data, sample_rate = sf.read(audio_file_path)
40
+ # Ensure the audio is 16kHz (Whisper expects 16kHz audio)
41
+ if sample_rate != 16000:
42
+ import torchaudio
43
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
44
+ audio_data = resampler(torch.tensor(audio_data).unsqueeze(0)).squeeze().numpy()
45
+
46
+
47
+ # Use the processor to prepare the input features
48
+ input_features = processor(audio_data, sampling_rate=16000, return_tensors="pt").input_features.to(device)
49
+
50
+ # Generate transcription (disable gradient calculation during inference)
51
+ with torch.no_grad():
52
+ predicted_ids = model.generate(input_features)
53
+
54
+ # Decode the generated IDs into human-readable text
55
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
56
+
57
+ print(transcription)
58
+
59
+ ```