Daniel Tse
commited on
Commit
·
e3d61f6
1
Parent(s):
c274bf0
Set max_new_tokens.
Browse filesChange summarization model to use Google
app.py
CHANGED
@@ -28,6 +28,7 @@ def transcribe_audio(audiofile):
|
|
28 |
model="openai/whisper-medium",
|
29 |
chunk_length_s=30,
|
30 |
device=device,
|
|
|
31 |
)
|
32 |
|
33 |
transcription = pipe(audiofile, batch_size=8)["text"]
|
@@ -39,10 +40,16 @@ def transcribe_audio(audiofile):
|
|
39 |
return transcription
|
40 |
|
41 |
def summarize_podcast(audiotranscription):
|
42 |
-
sum_pipe = pipeline("summarization",model="
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
st.markdown("# Podcast Q&A")
|
|
|
28 |
model="openai/whisper-medium",
|
29 |
chunk_length_s=30,
|
30 |
device=device,
|
31 |
+
max_new_tokens=60,
|
32 |
)
|
33 |
|
34 |
transcription = pipe(audiofile, batch_size=8)["text"]
|
|
|
40 |
return transcription
|
41 |
|
42 |
def summarize_podcast(audiotranscription):
|
43 |
+
sum_pipe = pipeline("summarization",model="google/flan-t5-base",clean_up_tokenization_spaces=True)
|
44 |
+
summarized_text = sum_pipe(audiotranscription,
|
45 |
+
max_length=1000,
|
46 |
+
min_length=100,
|
47 |
+
do_sample=False,
|
48 |
+
early_stopping=True,
|
49 |
+
num_beams=4)
|
50 |
+
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
51 |
+
|
52 |
+
return summarized_text
|
53 |
|
54 |
|
55 |
st.markdown("# Podcast Q&A")
|