File size: 4,702 Bytes
a9eb918
 
8b85024
a9eb918
 
fcf22f8
 
a9eb918
 
fcf22f8
a9eb918
 
 
21621d2
fcf22f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cf872d
 
 
 
 
a9eb918
 
 
 
 
21621d2
2cf872d
a9eb918
 
 
 
 
 
 
 
 
 
 
 
0c5f557
a9eb918
0c5f557
 
 
 
 
fcf22f8
0c5f557
fcf22f8
 
aeb1a49
fcf22f8
 
c473268
 
 
 
 
 
 
 
 
 
 
 
 
fcf22f8
 
 
a9eb918
 
 
 
 
47e1ddd
a9eb918
 
 
 
 
 
 
 
0c5f557
a9eb918
 
 
0c5f557
28d11de
a9eb918
 
 
2607f1d
 
 
 
0c5f557
2607f1d
 
0c5f557
 
2607f1d
 
a9eb918
28d11de
 
0c5f557
28d11de
 
a9eb918
fcf22f8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import yt_dlp
import os
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer
import nltk

#Download youtube video
@st.cache_data
def download_audio(link):
    with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
        video.download(link)

#Load Whisper pipeline via HuggingFace
@st.cache_resource
def load_whisper(seconds):
    return pipeline("automatic-speech-recognition",
                    model="openai/whisper-tiny",
                    chunk_length_s=seconds,
                   )

#Load Extractive Summarizer pipeline via HuggingFace
@st.cache_resource
def load_extractive():
    return pipeline("summarization",
               model = "NotXia/longformer-bio-ext-summ",
               tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
               trust_remote_code = True,
               )

'''
def get_transcription_en(mp3_file):
    model = whisper.load_model("tiny.en")
    directory = os.getcwd()
    result = model.transcribe(os.path.join(directory, mp3_file))
    return result["text"]

#def portuguese_sum_pipeline(link):
#    video_title = download_audio(link)

def english_sum_pipeline(link):
    download_audio(link)
    transcript_text = get_transcription_en("video.mp3")

    #extractive summarization
    extractive_model = Summarizer()
    extractive = extractive_model(transcript_text, num_sentences=15)

    #abstractive summarization
    device_num = 0 if torch.cuda.is_available() else -1
    abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer="facebook/bart-large-cnn", device=device_num)
    output_text = abstractive_summarizer(extractive)[0]["summary_text"]

    return transcript_text, output_text

def english_qa_pipeline(questions, context):
    nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
    answers = []
    for question in questions:
        result = nlp(question=question, context=context)
        answers.append(result["answer"])
    return answers
'''

def main():

    with st.sidebar():
        st.title(":blue[Turing]Videos")

        with st.form("data_collection"):
            compression_rate = st.slider("Selecione a taxa de compressão:",
                                         min_value=0.1, max_value=0.9,
                                         value=0.2, step=0.05
                                        )

           # Every form must have a submit button.
            submitted = st.form_submit_button("Submit")
            if submitted:
                st.success('Dados coletados!', icon="✅")
            else:
                st.error('Dados ainda não coletados!', icon="🚨")

main()

'''
#Collect inputs and create the interface
def main():
    header = st.container()
    model = st.container()
    model_1, model_2 = st.columns(2)
    qa = st.container()
    qa_1, qa_2 = st.columns(2)

    with header:
        st.title("TuringVideos")
        st.write("Este trabalho visa a criação de uma interface capaz de sumarizar e responder perguntas sobre um determinado vídeo em português ou inglês!")

    with model:
        st.header("Modelo para sumarização")

        with model_1:
            language = st.selectbox('Qual a linguagem do seu modelo?', ('Português (pt)', 'Inglês (en)', 'Outra'))
            link = st.text_area(label="Coloque o link do seu vídeo do YouTube!", height=25, placeholder="Digite seu link...")
            questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!", height=50, placeholder="Digite suas perguntas...").split(",")
            submit_1 = st.button('Gerar soluções!')

        with model_2:
            if submit_1:
                with st.spinner('Wait for it...'):
                    if language == 'Português (pt)':
                        #outputs = portuguese_sum_pipeline(link)
                        st.write("Modelo ainda não implementado.")

                    elif language == 'Inglês (en)':
                        outputs = english_sum_pipeline(link)
                        answers = english_qa_pipeline(questions, outputs[0])

                    else:
                        st.write("Erro na seleção de linguagem.")

                    st.write("Sumário.....................................................................: \n {} \n \n".format(outputs[1]))
                    st.write("Resposta....................................................................: \n")

                    for i in range(len(answers)):
                        st.write(questions[i] + ": " + answers[i])

main()
'''