Spaces:

EnzoBustos
/

TuringVideos

Sleeping

App Files Files Community

EnzoBustos commited on Dec 1, 2023

Commit

73871bd

1 Parent(s): 446d6c7

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -26

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import yt_dlp
 import os
 import streamlit as st
@@ -14,21 +16,20 @@ st.set_page_config(
     page_title = "Turing Videos",
     page_icon = icon,
     layout = "wide",
-    initial_sidebar_state = "auto",
 )
-#Download youtube video
-#@st.cache_data
 def download_audio(link):
     with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
         video.download(link)
 #Load Whisper pipeline via HuggingFace
 @st.cache_resource
-def load_whisper(seconds):
     return pipeline("automatic-speech-recognition",
                     model="openai/whisper-tiny",
-                    chunk_length_s=seconds,
                    )
 #Load Extractive Summarizer pipeline via HuggingFace
@@ -44,7 +45,7 @@ def load_extractive():
 @st.cache_resource
 def load_qa():
     return pipeline("question-answering",
-                    model='distilbert-base-uncased-distilled-squad'
                    )
 #Download punkt function from nltk
@@ -54,34 +55,34 @@ def load_nltk():
 #Make the ASR task
 @st.cache_data
-def audio_speech_recognition(model_pipeline, video="video.mp3"):
-    return model_pipeline(video, batch_size=8)["text"].strip()
 #Make the Summarization task
 @st.cache_data
-def text_summarization(model_pipeline, full_text, ratio):
     sentences = nltk.sent_tokenize(full_text)
-    extractive_sentences = model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio)
     extractive_text = " ".join(extractive_sentences[0])
     return extractive_text.strip()
 #Make the QA task
 @st.cache_data
-def answer_questions(model_pipeline, full_text, questionings):
     answers = []
     for question in questionings:
-        result = model_pipeline(question=question, context=full_text)
         answers.append(result["answer"])
     return answers
 def main():
     header = st.container()
     model = st.container()
     model_1, model_2 = st.columns(2)
     with st.sidebar:
         st.title(":red[Turing]Videos")
         with st.form("data_collection"):
@@ -102,20 +103,13 @@ def main():
                                      height=50, placeholder="Digite suas perguntas..."
                                     ).split(",")
-            seconds = st.select_slider(label="Digite a duração do seu vídeo para otimização:",
-                                       options = ["5 min", "15 min", "30 min", "45 min", "60 min"],
-                                       value = "15 min",
-                                      )
-            seconds = int(seconds.replace(" min", "")) * 60
             submitted = st.form_submit_button("Submit")
             if submitted:
                 st.success('Dados coletados!', icon="✅")
             else:
                 st.error('Dados ainda não coletados!', icon="🚨")
     with header:
         st.title(":red[Turing]Videos")
         st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.",
@@ -129,7 +123,7 @@ def main():
                 if language == "Inglês (en)":
                     download_audio(link)
                     load_nltk()
-                    whisper = load_whisper(seconds)
                     extractive = load_extractive()
                     qa_model = load_qa()
@@ -152,7 +146,7 @@ def main():
                 st.header("Resposta das perguntas:")
                 with st.spinner("Carregando respostas..."):
                     answers = answer_questions(qa_model, transcript_text, questions)
                 for i in range(len(answers)):
                     st.subheader(questions[i])
                     st.subheader(answers[i])

+%%writefile app.py
 import yt_dlp
 import os
 import streamlit as st
     page_title = "Turing Videos",
     page_icon = icon,
     layout = "wide",
+    initial_sidebar_state = "auto",
 )
+@st.cache_data
 def download_audio(link):
     with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
         video.download(link)
 #Load Whisper pipeline via HuggingFace
 @st.cache_resource
+def load_whisper():
     return pipeline("automatic-speech-recognition",
                     model="openai/whisper-tiny",
+                    chunk_length_s=30,
                    )
 #Load Extractive Summarizer pipeline via HuggingFace
 @st.cache_resource
 def load_qa():
     return pipeline("question-answering",
+                    model="rsvp-ai/bertserini-bert-base-squad"
                    )
 #Download punkt function from nltk
 #Make the ASR task
 @st.cache_data
+def audio_speech_recognition(_model_pipeline, video="video.mp3"):
+    return _model_pipeline(video, batch_size=64)["text"].strip()
 #Make the Summarization task
 @st.cache_data
+def text_summarization(_model_pipeline, full_text, ratio):
     sentences = nltk.sent_tokenize(full_text)
+    extractive_sentences = _model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio)
     extractive_text = " ".join(extractive_sentences[0])
     return extractive_text.strip()
 #Make the QA task
 @st.cache_data
+def answer_questions(_model_pipeline, full_text, questionings):
     answers = []
     for question in questionings:
+        result = _model_pipeline(question=question, context=full_text)
         answers.append(result["answer"])
     return answers
 def main():
     header = st.container()
     model = st.container()
     model_1, model_2 = st.columns(2)
     with st.sidebar:
         st.title(":red[Turing]Videos")
         with st.form("data_collection"):
                                      height=50, placeholder="Digite suas perguntas..."
                                     ).split(",")
             submitted = st.form_submit_button("Submit")
             if submitted:
                 st.success('Dados coletados!', icon="✅")
             else:
                 st.error('Dados ainda não coletados!', icon="🚨")
     with header:
         st.title(":red[Turing]Videos")
         st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.",
                 if language == "Inglês (en)":
                     download_audio(link)
                     load_nltk()
+                    whisper = load_whisper()
                     extractive = load_extractive()
                     qa_model = load_qa()
                 st.header("Resposta das perguntas:")
                 with st.spinner("Carregando respostas..."):
                     answers = answer_questions(qa_model, transcript_text, questions)
                 for i in range(len(answers)):
                     st.subheader(questions[i])
                     st.subheader(answers[i])