Spaces:
Sleeping
Sleeping
File size: 4,702 Bytes
a9eb918 8b85024 a9eb918 fcf22f8 a9eb918 fcf22f8 a9eb918 21621d2 fcf22f8 2cf872d a9eb918 21621d2 2cf872d a9eb918 0c5f557 a9eb918 0c5f557 fcf22f8 0c5f557 fcf22f8 aeb1a49 fcf22f8 c473268 fcf22f8 a9eb918 47e1ddd a9eb918 0c5f557 a9eb918 0c5f557 28d11de a9eb918 2607f1d 0c5f557 2607f1d 0c5f557 2607f1d a9eb918 28d11de 0c5f557 28d11de a9eb918 fcf22f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import yt_dlp
import os
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer
import nltk
#Download youtube video
@st.cache_data
def download_audio(link):
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
video.download(link)
#Load Whisper pipeline via HuggingFace
@st.cache_resource
def load_whisper(seconds):
return pipeline("automatic-speech-recognition",
model="openai/whisper-tiny",
chunk_length_s=seconds,
)
#Load Extractive Summarizer pipeline via HuggingFace
@st.cache_resource
def load_extractive():
return pipeline("summarization",
model = "NotXia/longformer-bio-ext-summ",
tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"),
trust_remote_code = True,
)
'''
def get_transcription_en(mp3_file):
model = whisper.load_model("tiny.en")
directory = os.getcwd()
result = model.transcribe(os.path.join(directory, mp3_file))
return result["text"]
#def portuguese_sum_pipeline(link):
# video_title = download_audio(link)
def english_sum_pipeline(link):
download_audio(link)
transcript_text = get_transcription_en("video.mp3")
#extractive summarization
extractive_model = Summarizer()
extractive = extractive_model(transcript_text, num_sentences=15)
#abstractive summarization
device_num = 0 if torch.cuda.is_available() else -1
abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer="facebook/bart-large-cnn", device=device_num)
output_text = abstractive_summarizer(extractive)[0]["summary_text"]
return transcript_text, output_text
def english_qa_pipeline(questions, context):
nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
answers = []
for question in questions:
result = nlp(question=question, context=context)
answers.append(result["answer"])
return answers
'''
def main():
with st.sidebar():
st.title(":blue[Turing]Videos")
with st.form("data_collection"):
compression_rate = st.slider("Selecione a taxa de compressão:",
min_value=0.1, max_value=0.9,
value=0.2, step=0.05
)
# Every form must have a submit button.
submitted = st.form_submit_button("Submit")
if submitted:
st.success('Dados coletados!', icon="✅")
else:
st.error('Dados ainda não coletados!', icon="🚨")
main()
'''
#Collect inputs and create the interface
def main():
header = st.container()
model = st.container()
model_1, model_2 = st.columns(2)
qa = st.container()
qa_1, qa_2 = st.columns(2)
with header:
st.title("TuringVideos")
st.write("Este trabalho visa a criação de uma interface capaz de sumarizar e responder perguntas sobre um determinado vídeo em português ou inglês!")
with model:
st.header("Modelo para sumarização")
with model_1:
language = st.selectbox('Qual a linguagem do seu modelo?', ('Português (pt)', 'Inglês (en)', 'Outra'))
link = st.text_area(label="Coloque o link do seu vídeo do YouTube!", height=25, placeholder="Digite seu link...")
questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!", height=50, placeholder="Digite suas perguntas...").split(",")
submit_1 = st.button('Gerar soluções!')
with model_2:
if submit_1:
with st.spinner('Wait for it...'):
if language == 'Português (pt)':
#outputs = portuguese_sum_pipeline(link)
st.write("Modelo ainda não implementado.")
elif language == 'Inglês (en)':
outputs = english_sum_pipeline(link)
answers = english_qa_pipeline(questions, outputs[0])
else:
st.write("Erro na seleção de linguagem.")
st.write("Sumário.....................................................................: \n {} \n \n".format(outputs[1]))
st.write("Resposta....................................................................: \n")
for i in range(len(answers)):
st.write(questions[i] + ": " + answers[i])
main()
''' |