aakash0563's picture
Update app.py
f9a35d7 verified
raw
history blame
3.08 kB
import pandas as pd
import numpy as np
import torch
from transformers import pipeline
import gradio as gr
import os
from youtube_transcript_api import YouTubeTranscriptApi
summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn")
def summarize(full_txt, min_summ_len=30):
l = full_txt.split(" ")
l_summ = []
chunk_len = 750
overlap = 50
pointer = 0
flag = True
while(flag):
if pointer < len(l):
if pointer + chunk_len < len(l):
txt = " ".join(l[pointer:pointer+chunk_len])
pointer = pointer + chunk_len - overlap
l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'])
else:
txt = " ".join(l[pointer:])
l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text'])
pointer = len(l)
flag = False
large_summ = " ".join(l_summ)
print(l_summ)
l_large_summ = large_summ.split(" ")
if len(large_summ.split(" ")) < chunk_len:
summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
else:
flag = True
pointer = 0
final_summ = []
while(flag):
if pointer < len(l_large_summ):
if pointer + chunk_len < len(l_large_summ):
txt = " ".join(l_large_summ[pointer:pointer+chunk_len])
pointer = pointer + chunk_len - overlap
t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']
print(t)
final_summ.append(t)
else:
txt = " ".join(l_large_summ[pointer:])
t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text']
final_summ.append(t)
print(t)
pointer = len(l_large_summ)
flag = False
large_summ = " ".join(final_summ)
summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
return summ
def extract_text(youtube_video_url,min_summ_len):
try:
video_id = youtube_video_url.split("=")[1]
transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
transcript = ""
for i in transcript_text:
transcript += " " + i["text"]
print(transcript)
res = summarize(transcript,min_summ_len)
return res
except Exception as e:
raise e
demo = gr.Interface(
fn=extract_text,
inputs=["text","number"], # Number input first, then file input
outputs="text",
title="YouTube Video Text Summarization for Efficient Information Capture",
description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.",
)
demo.launch(debug=True)