srinivasmmw2024 commited on
Commit
90d7c67
·
verified ·
1 Parent(s): 9dd7b4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
+ import soundfile as sf
4
+
5
+ # Load the processor and model
6
+ processor = Wav2Vec2Processor.from_pretrained("openbmb/MiniCPM-o-2_6")
7
+ model = Wav2Vec2ForCTC.from_pretrained("openbmb/MiniCPM-o-2_6")
8
+
9
+ def transcribe_audio(file_path):
10
+ # Load audio file
11
+ audio_input, sample_rate = sf.read(file_path)
12
+
13
+ # Preprocess the audio
14
+ input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values
15
+
16
+ # Perform inference
17
+ with torch.no_grad():
18
+ logits = model(input_values).logits
19
+
20
+ # Decode the logits to text
21
+ predicted_ids = torch.argmax(logits, dim=-1)
22
+ transcription = processor.batch_decode(predicted_ids)
23
+
24
+ return transcription[0]
25
+
26
+ if __name__ == "__main__":
27
+ audio_file_path = "CAR0005.mp3"
28
+ transcription = transcribe_audio(audio_file_path)
29
+ print("Transcription:", transcription)