aakash0563's picture
Update app.py
2a57676 verified
import pandas as pd
import numpy as np
import torch
from transformers import pipeline
import gradio as gr
import os
from youtube_transcript_api import YouTubeTranscriptApi
summarizer_ft = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn")
def summarize(full_txt, min_summ_len=30):
l = full_txt.split(" ")
l_summ = []
chunk_len = 750
overlap = 50
pointer = 0
flag = True
while(flag):
if pointer < len(l):
if pointer + chunk_len < len(l):
txt = " ".join(l[pointer:pointer+chunk_len])
pointer = pointer + chunk_len - overlap
l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'])
else:
txt = " ".join(l[pointer:])
l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text'])
pointer = len(l)
flag = False
large_summ = " ".join(l_summ)
print(l_summ)
l_large_summ = large_summ.split(" ")
if len(large_summ.split(" ")) < chunk_len:
summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
else:
flag = True
pointer = 0
final_summ = []
while(flag):
if pointer < len(l_large_summ):
if pointer + chunk_len < len(l_large_summ):
txt = " ".join(l_large_summ[pointer:pointer+chunk_len])
pointer = pointer + chunk_len - overlap
t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']
print(t)
final_summ.append(t)
else:
txt = " ".join(l_large_summ[pointer:])
t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text']
final_summ.append(t)
print(t)
pointer = len(l_large_summ)
flag = False
large_summ = " ".join(final_summ)
summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
return summ
def extract_text(youtube_video_url,min_summ_len):
# try:
# video_id = youtube_video_url.split("=")[1]
# transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
# transcript = ""
# for i in transcript_text:
# transcript += " " + i["text"]
# print(transcript)
# res = summarize(transcript,min_summ_len)
# return res
# except Exception as e:
# raise e
video_id = youtube_video_url.split("=")[1]
transcript_text = YouTubeTranscriptApi.get_transcript(video_id,languages=['hi', 'en'])
transcript = " "
for i in transcript_text:
transcript += " " + i["text"]
print(transcript)
res = summarize(transcript,min_summ_len)
print(res)
return res
demo = gr.Interface(
fn=extract_text,
inputs=["text","number"], # Number input first, then file input
outputs="text",
title="YouTube Video Text Summarization for Efficient Information Capture",
description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.",
)
demo.launch(debug=True)