Spaces:

EnzoBustos
/

TuringVideos

Sleeping

File size: 5,260 Bytes

a9eb918
 
8b85024
a9eb918
 
fcf22f8
 
26ce529
3055d40
a9eb918
43bed09
 
 
 
 
 
73871bd
43bed09
 
3650db9
a9eb918
7732184
 
 
 
 
21621d2
fcf22f8
 
73871bd
fcf22f8
 
73871bd
fcf22f8
 
 
 
 
 
 
 
 
 
 
c7b0462
 
 
 
73871bd
c7b0462
 
 
 
 
 
0c5f557
c7b0462
 
7732184
123d7bc
c7b0462
 
 
73871bd
c7b0462
73871bd
c7b0462
 
 
 
 
73871bd
c7b0462
 
73871bd
c7b0462
 
73871bd
fcf22f8
 
c7b0462
 
 
73871bd
a5ef683
73871bd
c7b0462
fcf22f8
c473268
c7b0462
 
4bb3f2e
c7b0462
 
 
 
 
 
2ea9404
c7b0462
c473268
 
c7b0462
 
 
 
29f7a53
73871bd
c473268
 
 
 
73871bd
c7b0462
 
881507e
e1ac220
c7b0462
 
 
 
 
 
 
7732184
c7b0462
73871bd
c7b0462
 
 
cc0de6d
c7b0462
 
 
 
 
 
7732184
c7b0462
5c4c65a
c7b0462
 
 
 
 
5c4c65a
c7b0462
 
 
73871bd
c7b0462
 
 
942362b
c7b0462

import yt_dlp
import os
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer
import nltk
from PIL import Image
import torch

icon = Image.open("Traçado laranja #f1863d.png")

st.set_page_config(
    page_title = "Turing Videos",
    page_icon = icon,
    layout = "wide",
    initial_sidebar_state = "auto",
)

#@st.cache_resource
def download_audio(link):
    with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(id)s.mp3'}) as video:
        info_dict = video.extract_info(link, download = True)
        id = info_dict['id']
        video.download(link)    
        return id

#Load Whisper pipeline via HuggingFace
@st.cache_resource
def load_whisper():
    return pipeline("automatic-speech-recognition",
                    model="openai/whisper-tiny",
                    chunk_length_s=30,
                   )

#Load Extractive Summarizer pipeline via HuggingFace
@st.cache_resource
def load_extractive():
    return pipeline("summarization",
               model = "NotXia/longformer-bio-ext-summ",
               tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
               trust_remote_code = True,
               )

#Load QA pipeline via HuggingFace
@st.cache_resource
def load_qa():
    return pipeline("question-answering",
                    model="rsvp-ai/bertserini-bert-base-squad"
                   )

#Download punkt function from nltk
@st.cache_data
def load_nltk():
    nltk.download("punkt")

#Make the ASR task
@st.cache_data
def audio_speech_recognition(_model_pipeline, video_id):
    return _model_pipeline(video_id + ".mp3", batch_size=64)["text"].strip()

#Make the Summarization task
@st.cache_data
def text_summarization(_model_pipeline, full_text, ratio):
    sentences = nltk.sent_tokenize(full_text)
    extractive_sentences = _model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio)
    extractive_text = " ".join(extractive_sentences[0])
    return extractive_text.strip()

#Make the QA task
@st.cache_data
def answer_questions(_model_pipeline, full_text, questionings):
    answers = []
    for question in questionings:
        result = _model_pipeline(question=question, context=full_text)
        answers.append(result["answer"])
    return answers

def main():

    header = st.container()
    model = st.container()
    model_1, model_2 = st.columns(2)

    with st.sidebar:

        st.title(":red[Turing]Videos")

        with st.form("data_collection"):

            language = st.selectbox('Qual a linguagem do seu modelo?',
                                    ('Inglês (en)', 'Português (pt)', 'Outra')
                                   )

            link = st.text_area(label="Coloque o link do seu vídeo do YouTube:",
                                height=25, placeholder="Digite seu link...")

            compression_rate = st.slider(label="Selecione a taxa de compressão:",
                                         min_value=0.05, max_value=0.35,
                                         value=0.25, step=0.05
                                        )

            questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!",
                                     height=50, placeholder="Digite suas perguntas..."
                                    ).split(",")

            submitted = st.form_submit_button("Submit")

            if submitted:
                st.success('Dados coletados!', icon="✅")
            else:
                st.error('Dados ainda não coletados!', icon="🚨")

    with header:
        st.title(":red[Turing]Videos")
        st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.",
                     divider = "red"
                    )

    with model:
        if submitted:
            with st.spinner("Carregando modelos..."):

                if language == "Inglês (en)":
                    id = download_audio(link)
                    load_nltk()
                    whisper = load_whisper()
                    extractive = load_extractive()
                    qa_model = load_qa()

                elif language == "Português (pt)":
                    st.header("Modelo ainda não implementado.")

                else:
                    st.header("Erro na seleção de linguagem.")

            with st.spinner("Transcrevendo texto..."):
                transcript_text = audio_speech_recognition(whisper, id)

            with model_1:
                st.header("Texto Sumarizado:")
                with st.spinner("Carregando sumarização..."):
                    summary = text_summarization(extractive, transcript_text, compression_rate)
                st.subheader(summary)

            with model_2:
                st.header("Resposta das perguntas:")
                with st.spinner("Carregando respostas..."):
                    answers = answer_questions(qa_model, transcript_text, questions)

                for i in range(len(answers)):
                    st.subheader(questions[i])
                    st.subheader(answers[i])
                    st.write("\n\n")

main()