Spaces:
Running
Running
File size: 1,647 Bytes
9c1d41d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import whisper
class WhisperModel(object):
def __init__(self,model_type):
self.model = whisper.load_model("base")
# Transcribe an audio file
def transcribe_audio(self,file_path):
try:
result = self.model.transcribe(file_path)
return result
except Exception as e:
print(f"Error {e}")
raise Exception(f'Error trnascribe audio file {e}')
def get_text(self,transcription):
return transcription['text']
def get_detected_language(self,transcription):
return transcription['language']
def get_segments(self,transcription):
text_segments = []
for segment in transcription['segments']:
text_segments.append({
"text": segment['text'],
"start": segment['start'],
"end": segment['end'],
"id": segment['id'],
})
return text_segments
def detect_language(self,file_path):
try:
audio = whisper.load_audio(file_path)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
# detect the spoken language
_, probs = self.model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
return max(probs, key=probs.get)
except Exception as e:
print(f"Error {e}")
raise Exception(f'Error detecting language {e}')
|