Daniel Tse
commited on
Commit
·
555a0ea
1
Parent(s):
752daec
Use simpler method for transcription.
Browse files
app.py
CHANGED
@@ -20,21 +20,10 @@ def transcribe_audio(audiofile):
|
|
20 |
podcast_duration = podcast.duration_seconds
|
21 |
print(f"Audio Duration: {podcast_duration}")
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
pipe = pipeline(
|
27 |
-
"automatic-speech-recognition",
|
28 |
-
model="openai/whisper-small.en",
|
29 |
-
chunk_length_s=30,
|
30 |
-
device=device,
|
31 |
-
max_new_tokens=60,
|
32 |
-
)
|
33 |
-
|
34 |
-
transcription = pipe(audiofile, batch_size=8)["text"]
|
35 |
-
|
36 |
st.session_state['transcription'] = transcription
|
37 |
-
print(f"
|
38 |
st.info('Done Transcription')
|
39 |
|
40 |
return transcription
|
|
|
20 |
podcast_duration = podcast.duration_seconds
|
21 |
print(f"Audio Duration: {podcast_duration}")
|
22 |
|
23 |
+
whisper_model = whisper.load_model("small.en")
|
24 |
+
transcription = whisper_model.transcribe(audiofile)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
st.session_state['transcription'] = transcription
|
26 |
+
print(f"ranscription: {transcription['text']}")
|
27 |
st.info('Done Transcription')
|
28 |
|
29 |
return transcription
|