Daniel Tse
commited on
Commit
·
e7956b2
1
Parent(s):
555a0ea
Implement Summary
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ def transcribe_audio(audiofile):
|
|
10 |
st.session_state['audio'] = audiofile
|
11 |
print(f"audio_file_session_state:{st.session_state['audio'] }")
|
12 |
|
|
|
13 |
#get size of audio file
|
14 |
audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
|
15 |
print(f"audio file size:{audio_size}")
|
@@ -20,6 +21,7 @@ def transcribe_audio(audiofile):
|
|
20 |
podcast_duration = podcast.duration_seconds
|
21 |
print(f"Audio Duration: {podcast_duration}")
|
22 |
|
|
|
23 |
whisper_model = whisper.load_model("small.en")
|
24 |
transcription = whisper_model.transcribe(audiofile)
|
25 |
st.session_state['transcription'] = transcription
|
@@ -29,15 +31,9 @@ def transcribe_audio(audiofile):
|
|
29 |
return transcription
|
30 |
|
31 |
def summarize_podcast(audiotranscription):
|
32 |
-
|
33 |
-
summarized_text = sum_pipe(audiotranscription,
|
34 |
-
max_length=1000,
|
35 |
-
min_length=100,
|
36 |
-
do_sample=False,
|
37 |
-
early_stopping=True,
|
38 |
-
num_beams=4)
|
39 |
-
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
40 |
|
|
|
41 |
return summarized_text
|
42 |
|
43 |
|
@@ -55,7 +51,19 @@ st.markdown(
|
|
55 |
|
56 |
st.audio("marketplace-2023-06-14.mp3")
|
57 |
if st.button("Process Audio File"):
|
58 |
-
transcribe_audio("marketplace-2023-06-14.mp3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
#audio_file = st.file_uploader("Upload audio copy of file", key="upload", type=['.mp3'])
|
61 |
|
|
|
10 |
st.session_state['audio'] = audiofile
|
11 |
print(f"audio_file_session_state:{st.session_state['audio'] }")
|
12 |
|
13 |
+
st.info("Getting size of file")
|
14 |
#get size of audio file
|
15 |
audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
|
16 |
print(f"audio file size:{audio_size}")
|
|
|
21 |
podcast_duration = podcast.duration_seconds
|
22 |
print(f"Audio Duration: {podcast_duration}")
|
23 |
|
24 |
+
st.info("Transcribing")
|
25 |
whisper_model = whisper.load_model("small.en")
|
26 |
transcription = whisper_model.transcribe(audiofile)
|
27 |
st.session_state['transcription'] = transcription
|
|
|
31 |
return transcription
|
32 |
|
33 |
def summarize_podcast(audiotranscription):
|
34 |
+
summarizer = pipeline("summarization", model="philschmid/flan-t5-base-samsum", device=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
summarized_text = summarizer(audiotranscription)
|
37 |
return summarized_text
|
38 |
|
39 |
|
|
|
51 |
|
52 |
st.audio("marketplace-2023-06-14.mp3")
|
53 |
if st.button("Process Audio File"):
|
54 |
+
podcast_text = transcribe_audio("marketplace-2023-06-14.mp3")
|
55 |
+
#write text out
|
56 |
+
st.expander("See Transcription"):
|
57 |
+
st.caption(podcast_text)
|
58 |
+
|
59 |
+
#Summarize Text
|
60 |
+
podcast_summary = summarize_podcast(podcast_text)
|
61 |
+
st.markdown(
|
62 |
+
"""
|
63 |
+
##Summary of Text
|
64 |
+
"""
|
65 |
+
)
|
66 |
+
st.text(podcast_summary)
|
67 |
|
68 |
#audio_file = st.file_uploader("Upload audio copy of file", key="upload", type=['.mp3'])
|
69 |
|