Spaces:
Sleeping
Sleeping
Commit
·
fcf22f8
1
Parent(s):
31fcf9c
Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,35 @@
|
|
1 |
import yt_dlp
|
2 |
-
import whisper
|
3 |
import os
|
4 |
import streamlit as st
|
5 |
import transformers
|
6 |
from transformers import pipeline
|
7 |
-
from
|
8 |
-
import
|
9 |
|
10 |
#Download youtube video
|
|
|
11 |
def download_audio(link):
|
12 |
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
|
13 |
video.download(link)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def get_transcription_en(mp3_file):
|
16 |
model = whisper.load_model("tiny.en")
|
17 |
directory = os.getcwd()
|
@@ -43,7 +61,17 @@ def english_qa_pipeline(questions, context):
|
|
43 |
result = nlp(question=question, context=context)
|
44 |
answers.append(result["answer"])
|
45 |
return answers
|
|
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
#Collect inputs and create the interface
|
48 |
def main():
|
49 |
header = st.container()
|
@@ -85,4 +113,5 @@ def main():
|
|
85 |
for i in range(len(answers)):
|
86 |
st.write(questions[i] + ": " + answers[i])
|
87 |
|
88 |
-
main()
|
|
|
|
1 |
import yt_dlp
|
|
|
2 |
import os
|
3 |
import streamlit as st
|
4 |
import transformers
|
5 |
from transformers import pipeline
|
6 |
+
from transformers import AutoTokenizer
|
7 |
+
import nltk
|
8 |
|
9 |
#Download youtube video
|
10 |
+
@st.cache_data
|
11 |
def download_audio(link):
|
12 |
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
|
13 |
video.download(link)
|
14 |
|
15 |
+
#Load Whisper pipeline via HuggingFace
|
16 |
+
@st.cache_resource
|
17 |
+
def load_whisper(seconds):
|
18 |
+
return pipeline("automatic-speech-recognition",
|
19 |
+
model="openai/whisper-tiny",
|
20 |
+
chunk_length_s=seconds,
|
21 |
+
)
|
22 |
+
|
23 |
+
#Load Extractive Summarizer pipeline via HuggingFace
|
24 |
+
@st.cache_resource
|
25 |
+
def load_extractive():
|
26 |
+
return pipeline("summarization",
|
27 |
+
model = "NotXia/longformer-bio-ext-summ",
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
|
29 |
+
trust_remote_code = True,
|
30 |
+
)
|
31 |
+
|
32 |
+
'''
|
33 |
def get_transcription_en(mp3_file):
|
34 |
model = whisper.load_model("tiny.en")
|
35 |
directory = os.getcwd()
|
|
|
61 |
result = nlp(question=question, context=context)
|
62 |
answers.append(result["answer"])
|
63 |
return answers
|
64 |
+
'''
|
65 |
|
66 |
+
def main():
|
67 |
+
sidebar = st.sidebar()
|
68 |
+
|
69 |
+
with sidebar:
|
70 |
+
st.title(":blue[Turing]Videos")
|
71 |
+
|
72 |
+
main()
|
73 |
+
|
74 |
+
'''
|
75 |
#Collect inputs and create the interface
|
76 |
def main():
|
77 |
header = st.container()
|
|
|
113 |
for i in range(len(answers)):
|
114 |
st.write(questions[i] + ": " + answers[i])
|
115 |
|
116 |
+
main()
|
117 |
+
'''
|