Daniel Tse
commited on
Commit
·
b74cc3f
1
Parent(s):
3277eaa
Import AudioSegment for audio manipulations
Browse files- app.py +6 -2
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -2,18 +2,22 @@ import streamlit as st
|
|
| 2 |
import whisper
|
| 3 |
import os
|
| 4 |
from transformers import pipeline
|
|
|
|
| 5 |
|
| 6 |
def transcribe_audio(audiofile):
|
| 7 |
|
| 8 |
st.session_state['audio'] = audiofile
|
| 9 |
-
|
| 10 |
print(f"audio_file_session_state:{st.session_state['audio'] }")
|
| 11 |
|
| 12 |
#get size of audio file
|
| 13 |
audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
|
| 14 |
-
|
| 15 |
print(f"audio file size:{audio_size}")
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
return audio_size
|
| 18 |
|
| 19 |
st.markdown("# Podcast Q&A")
|
|
|
|
| 2 |
import whisper
|
| 3 |
import os
|
| 4 |
from transformers import pipeline
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
|
| 7 |
def transcribe_audio(audiofile):
|
| 8 |
|
| 9 |
st.session_state['audio'] = audiofile
|
|
|
|
| 10 |
print(f"audio_file_session_state:{st.session_state['audio'] }")
|
| 11 |
|
| 12 |
#get size of audio file
|
| 13 |
audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
|
|
|
|
| 14 |
print(f"audio file size:{audio_size}")
|
| 15 |
|
| 16 |
+
#determine audio length of file
|
| 17 |
+
#determine if we need to break up file into chunks
|
| 18 |
+
if (audio_size > )
|
| 19 |
+
|
| 20 |
+
|
| 21 |
return audio_size
|
| 22 |
|
| 23 |
st.markdown("# Podcast Q&A")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
torch
|
| 2 |
transformers
|
| 3 |
git+https://github.com/openai/whisper.git
|
|
|
|
|
|
| 1 |
torch
|
| 2 |
transformers
|
| 3 |
git+https://github.com/openai/whisper.git
|
| 4 |
+
AudioSegment
|