aakash0563's picture
Update app.py
2a57676 verified
raw
history blame
3.51 kB
import pandas as pd
import numpy as np
import torch
from transformers import pipeline
import gradio as gr
import os
from youtube_transcript_api import YouTubeTranscriptApi
summarizer_ft = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn")
def summarize(full_txt, min_summ_len=30):
l = full_txt.split(" ")
l_summ = []
chunk_len = 750
overlap = 50
pointer = 0
flag = True
while(flag):
if pointer < len(l):
if pointer + chunk_len < len(l):
txt = " ".join(l[pointer:pointer+chunk_len])
pointer = pointer + chunk_len - overlap
l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'])
else:
txt = " ".join(l[pointer:])
l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text'])
pointer = len(l)
flag = False
large_summ = " ".join(l_summ)
print(l_summ)
l_large_summ = large_summ.split(" ")
if len(large_summ.split(" ")) < chunk_len:
summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
else:
flag = True
pointer = 0
final_summ = []
while(flag):
if pointer < len(l_large_summ):
if pointer + chunk_len < len(l_large_summ):
txt = " ".join(l_large_summ[pointer:pointer+chunk_len])
pointer = pointer + chunk_len - overlap
t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']
print(t)
final_summ.append(t)
else:
txt = " ".join(l_large_summ[pointer:])
t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text']
final_summ.append(t)
print(t)
pointer = len(l_large_summ)
flag = False
large_summ = " ".join(final_summ)
summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
return summ
def extract_text(youtube_video_url,min_summ_len):
# try:
# video_id = youtube_video_url.split("=")[1]
# transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
# transcript = ""
# for i in transcript_text:
# transcript += " " + i["text"]
# print(transcript)
# res = summarize(transcript,min_summ_len)
# return res
# except Exception as e:
# raise e
video_id = youtube_video_url.split("=")[1]
transcript_text = YouTubeTranscriptApi.get_transcript(video_id,languages=['hi', 'en'])
transcript = " "
for i in transcript_text:
transcript += " " + i["text"]
print(transcript)
res = summarize(transcript,min_summ_len)
print(res)
return res
demo = gr.Interface(
fn=extract_text,
inputs=["text","number"], # Number input first, then file input
outputs="text",
title="YouTube Video Text Summarization for Efficient Information Capture",
description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.",
)
demo.launch(debug=True)