Spaces:
Sleeping
Sleeping
Commit
·
fcf22f8
1
Parent(s):
31fcf9c
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,35 @@
|
|
| 1 |
import yt_dlp
|
| 2 |
-
import whisper
|
| 3 |
import os
|
| 4 |
import streamlit as st
|
| 5 |
import transformers
|
| 6 |
from transformers import pipeline
|
| 7 |
-
from
|
| 8 |
-
import
|
| 9 |
|
| 10 |
#Download youtube video
|
|
|
|
| 11 |
def download_audio(link):
|
| 12 |
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
|
| 13 |
video.download(link)
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def get_transcription_en(mp3_file):
|
| 16 |
model = whisper.load_model("tiny.en")
|
| 17 |
directory = os.getcwd()
|
|
@@ -43,7 +61,17 @@ def english_qa_pipeline(questions, context):
|
|
| 43 |
result = nlp(question=question, context=context)
|
| 44 |
answers.append(result["answer"])
|
| 45 |
return answers
|
|
|
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
#Collect inputs and create the interface
|
| 48 |
def main():
|
| 49 |
header = st.container()
|
|
@@ -85,4 +113,5 @@ def main():
|
|
| 85 |
for i in range(len(answers)):
|
| 86 |
st.write(questions[i] + ": " + answers[i])
|
| 87 |
|
| 88 |
-
main()
|
|
|
|
|
|
| 1 |
import yt_dlp
|
|
|
|
| 2 |
import os
|
| 3 |
import streamlit as st
|
| 4 |
import transformers
|
| 5 |
from transformers import pipeline
|
| 6 |
+
from transformers import AutoTokenizer
|
| 7 |
+
import nltk
|
| 8 |
|
| 9 |
#Download youtube video
|
| 10 |
+
@st.cache_data
|
| 11 |
def download_audio(link):
|
| 12 |
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
|
| 13 |
video.download(link)
|
| 14 |
|
| 15 |
+
#Load Whisper pipeline via HuggingFace
|
| 16 |
+
@st.cache_resource
|
| 17 |
+
def load_whisper(seconds):
|
| 18 |
+
return pipeline("automatic-speech-recognition",
|
| 19 |
+
model="openai/whisper-tiny",
|
| 20 |
+
chunk_length_s=seconds,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
#Load Extractive Summarizer pipeline via HuggingFace
|
| 24 |
+
@st.cache_resource
|
| 25 |
+
def load_extractive():
|
| 26 |
+
return pipeline("summarization",
|
| 27 |
+
model = "NotXia/longformer-bio-ext-summ",
|
| 28 |
+
tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
|
| 29 |
+
trust_remote_code = True,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
'''
|
| 33 |
def get_transcription_en(mp3_file):
|
| 34 |
model = whisper.load_model("tiny.en")
|
| 35 |
directory = os.getcwd()
|
|
|
|
| 61 |
result = nlp(question=question, context=context)
|
| 62 |
answers.append(result["answer"])
|
| 63 |
return answers
|
| 64 |
+
'''
|
| 65 |
|
| 66 |
+
def main():
|
| 67 |
+
sidebar = st.sidebar()
|
| 68 |
+
|
| 69 |
+
with sidebar:
|
| 70 |
+
st.title(":blue[Turing]Videos")
|
| 71 |
+
|
| 72 |
+
main()
|
| 73 |
+
|
| 74 |
+
'''
|
| 75 |
#Collect inputs and create the interface
|
| 76 |
def main():
|
| 77 |
header = st.container()
|
|
|
|
| 113 |
for i in range(len(answers)):
|
| 114 |
st.write(questions[i] + ": " + answers[i])
|
| 115 |
|
| 116 |
+
main()
|
| 117 |
+
'''
|