TuringVideos / app.py
EnzoBustos's picture
Update app.py
123d7bc
raw
history blame
5.26 kB
import yt_dlp
import os
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer
import nltk
from PIL import Image
import torch
icon = Image.open("Traçado laranja #f1863d.png")
st.set_page_config(
page_title = "Turing Videos",
page_icon = icon,
layout = "wide",
initial_sidebar_state = "auto",
)
#@st.cache_resource
def download_audio(link):
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(id)s.mp3'}) as video:
info_dict = video.extract_info(link, download = True)
id = info_dict['id']
video.download(link)
return id
#Load Whisper pipeline via HuggingFace
@st.cache_resource
def load_whisper():
return pipeline("automatic-speech-recognition",
model="openai/whisper-tiny",
chunk_length_s=30,
)
#Load Extractive Summarizer pipeline via HuggingFace
@st.cache_resource
def load_extractive():
return pipeline("summarization",
model = "NotXia/longformer-bio-ext-summ",
tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
trust_remote_code = True,
)
#Load QA pipeline via HuggingFace
@st.cache_resource
def load_qa():
return pipeline("question-answering",
model="rsvp-ai/bertserini-bert-base-squad"
)
#Download punkt function from nltk
@st.cache_data
def load_nltk():
nltk.download("punkt")
#Make the ASR task
@st.cache_data
def audio_speech_recognition(_model_pipeline, video_id):
return _model_pipeline(video_id + ".mp3", batch_size=64)["text"].strip()
#Make the Summarization task
@st.cache_data
def text_summarization(_model_pipeline, full_text, ratio):
sentences = nltk.sent_tokenize(full_text)
extractive_sentences = _model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio)
extractive_text = " ".join(extractive_sentences[0])
return extractive_text.strip()
#Make the QA task
@st.cache_data
def answer_questions(_model_pipeline, full_text, questionings):
answers = []
for question in questionings:
result = _model_pipeline(question=question, context=full_text)
answers.append(result["answer"])
return answers
def main():
header = st.container()
model = st.container()
model_1, model_2 = st.columns(2)
with st.sidebar:
st.title(":red[Turing]Videos")
with st.form("data_collection"):
language = st.selectbox('Qual a linguagem do seu modelo?',
('Inglês (en)', 'Português (pt)', 'Outra')
)
link = st.text_area(label="Coloque o link do seu vídeo do YouTube:",
height=25, placeholder="Digite seu link...")
compression_rate = st.slider(label="Selecione a taxa de compressão:",
min_value=0.05, max_value=0.35,
value=0.25, step=0.05
)
questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!",
height=50, placeholder="Digite suas perguntas..."
).split(",")
submitted = st.form_submit_button("Submit")
if submitted:
st.success('Dados coletados!', icon="✅")
else:
st.error('Dados ainda não coletados!', icon="🚨")
with header:
st.title(":red[Turing]Videos")
st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.",
divider = "red"
)
with model:
if submitted:
with st.spinner("Carregando modelos..."):
if language == "Inglês (en)":
id = download_audio(link)
load_nltk()
whisper = load_whisper()
extractive = load_extractive()
qa_model = load_qa()
elif language == "Português (pt)":
st.header("Modelo ainda não implementado.")
else:
st.header("Erro na seleção de linguagem.")
with st.spinner("Transcrevendo texto..."):
transcript_text = audio_speech_recognition(whisper, id)
with model_1:
st.header("Texto Sumarizado:")
with st.spinner("Carregando sumarização..."):
summary = text_summarization(extractive, transcript_text, compression_rate)
st.subheader(summary)
with model_2:
st.header("Resposta das perguntas:")
with st.spinner("Carregando respostas..."):
answers = answer_questions(qa_model, transcript_text, questions)
for i in range(len(answers)):
st.subheader(questions[i])
st.subheader(answers[i])
st.write("\n\n")
main()