File size: 5,269 Bytes
a9eb918
 
8b85024
a9eb918
 
fcf22f8
 
26ce529
a9eb918
 
fcf22f8
a9eb918
 
 
21621d2
fcf22f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c4c65a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c5f557
fcf22f8
26ce529
 
 
 
 
 
 
 
fcf22f8
aeb1a49
fcf22f8
 
c473268
 
 
 
 
 
 
 
 
 
 
 
 
fcf22f8
 
5c4c65a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import yt_dlp
import os
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer
import nltk
from PIL import Image

#Download youtube video
@st.cache_data
def download_audio(link):
    with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
        video.download(link)

#Load Whisper pipeline via HuggingFace
@st.cache_resource
def load_whisper(seconds):
    return pipeline("automatic-speech-recognition",
                    model="openai/whisper-tiny",
                    chunk_length_s=seconds,
                   )

#Load Extractive Summarizer pipeline via HuggingFace
@st.cache_resource
def load_extractive():
    return pipeline("summarization",
               model = "NotXia/longformer-bio-ext-summ",
               tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
               trust_remote_code = True,
               )

def old_funcs():
    '''
    def get_transcription_en(mp3_file):
        model = whisper.load_model("tiny.en")
        directory = os.getcwd()
        result = model.transcribe(os.path.join(directory, mp3_file))
        return result["text"]
    
    #def portuguese_sum_pipeline(link):
    #    video_title = download_audio(link)
    
    def english_sum_pipeline(link):
        download_audio(link)
        transcript_text = get_transcription_en("video.mp3")
    
        #extractive summarization
        extractive_model = Summarizer()
        extractive = extractive_model(transcript_text, num_sentences=15)
    
        #abstractive summarization
        device_num = 0 if torch.cuda.is_available() else -1
        abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer="facebook/bart-large-cnn", device=device_num)
        output_text = abstractive_summarizer(extractive)[0]["summary_text"]
    
        return transcript_text, output_text
    
    def english_qa_pipeline(questions, context):
        nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
        answers = []
        for question in questions:
            result = nlp(question=question, context=context)
            answers.append(result["answer"])
        return answers
    '''

def main():
    icon = Image.open("Traçado laranja #f1863d.png")
    
    st.set_page_config(
        page_title = "Turing Videos",
        page_icon = icon,
        layout = "wide",
        initial_sidebar_state = "auto",  
    )

    with st.sidebar():
        st.title(":blue[Turing]Videos")

        with st.form("data_collection"):
            compression_rate = st.slider("Selecione a taxa de compressão:",
                                         min_value=0.1, max_value=0.9,
                                         value=0.2, step=0.05
                                        )

           # Every form must have a submit button.
            submitted = st.form_submit_button("Submit")
            if submitted:
                st.success('Dados coletados!', icon="✅")
            else:
                st.error('Dados ainda não coletados!', icon="🚨")

main()

def old_main():
    '''
    #Collect inputs and create the interface
    def main():
        header = st.container()
        model = st.container()
        model_1, model_2 = st.columns(2)
        qa = st.container()
        qa_1, qa_2 = st.columns(2)
    
        with header:
            st.title("TuringVideos")
            st.write("Este trabalho visa a criação de uma interface capaz de sumarizar e responder perguntas sobre um determinado vídeo em português ou inglês!")
    
        with model:
            st.header("Modelo para sumarização")
    
            with model_1:
                language = st.selectbox('Qual a linguagem do seu modelo?', ('Português (pt)', 'Inglês (en)', 'Outra'))
                link = st.text_area(label="Coloque o link do seu vídeo do YouTube!", height=25, placeholder="Digite seu link...")
                questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!", height=50, placeholder="Digite suas perguntas...").split(",")
                submit_1 = st.button('Gerar soluções!')
    
            with model_2:
                if submit_1:
                    with st.spinner('Wait for it...'):
                        if language == 'Português (pt)':
                            #outputs = portuguese_sum_pipeline(link)
                            st.write("Modelo ainda não implementado.")
    
                        elif language == 'Inglês (en)':
                            outputs = english_sum_pipeline(link)
                            answers = english_qa_pipeline(questions, outputs[0])
    
                        else:
                            st.write("Erro na seleção de linguagem.")
    
                        st.write("Sumário.....................................................................: \n {} \n \n".format(outputs[1]))
                        st.write("Resposta....................................................................: \n")
    
                        for i in range(len(answers)):
                            st.write(questions[i] + ": " + answers[i])
    '''