Spaces:
Sleeping
Sleeping
File size: 3,431 Bytes
a9eb918 8b85024 a9eb918 2cf872d a9eb918 21621d2 2cf872d a9eb918 21621d2 2cf872d a9eb918 0c5f557 a9eb918 0c5f557 a9eb918 47e1ddd a9eb918 0c5f557 a9eb918 0c5f557 28d11de a9eb918 2607f1d 0c5f557 2607f1d 0c5f557 2607f1d a9eb918 28d11de 0c5f557 28d11de a9eb918 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import yt_dlp
import whisper
import os
import streamlit as st
import transformers
from transformers import pipeline
from summarizer import Summarizer
import torch
#Download youtube video
def download_audio(link):
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
video.download(link)
def get_transcription_en(mp3_file):
model = whisper.load_model("tiny.en")
directory = os.getcwd()
result = model.transcribe(os.path.join(directory, mp3_file))
return result["text"]
#def portuguese_sum_pipeline(link):
# video_title = download_audio(link)
def english_sum_pipeline(link):
download_audio(link)
transcript_text = get_transcription_en("video.mp3")
#extractive summarization
extractive_model = Summarizer()
extractive = extractive_model(transcript_text, num_sentences=15)
#abstractive summarization
device_num = 0 if torch.cuda.is_available() else -1
abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer="facebook/bart-large-cnn", device=device_num)
output_text = abstractive_summarizer(extractive)[0]["summary_text"]
return transcript_text, output_text
def english_qa_pipeline(questions, context):
nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
answers = []
for question in questions:
result = nlp(question=question, context=context)
answers.append(result["answer"])
return answers
#Collect inputs and create the interface
def main():
header = st.container()
model = st.container()
model_1, model_2 = st.columns(2)
qa = st.container()
qa_1, qa_2 = st.columns(2)
with header:
st.title("TuringVideos")
st.write("Este trabalho visa a criação de uma interface capaz de sumarizar e responder perguntas sobre um determinado vídeo em português ou inglês!")
with model:
st.header("Modelo para sumarização")
with model_1:
language = st.selectbox('Qual a linguagem do seu modelo?', ('Português (pt)', 'Inglês (en)', 'Outra'))
link = st.text_area(label="Coloque o link do seu vídeo do YouTube!", height=25, placeholder="Digite seu link...")
questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!", height=50, placeholder="Digite suas perguntas...").split(",")
submit_1 = st.button('Gerar soluções!')
with model_2:
if submit_1:
with st.spinner('Wait for it...'):
if language == 'Português (pt)':
#outputs = portuguese_sum_pipeline(link)
st.write("Modelo ainda não implementado.")
elif language == 'Inglês (en)':
outputs = english_sum_pipeline(link)
answers = english_qa_pipeline(questions, outputs[0])
else:
st.write("Erro na seleção de linguagem.")
st.write("Sumário.....................................................................: \n {} \n \n".format(outputs[1]))
st.write("Resposta....................................................................: \n")
for i in range(len(answers)):
st.write(questions[i] + ": " + answers[i])
main() |