|
import pandas as pd |
|
import numpy as np |
|
import torch |
|
from transformers import pipeline |
|
import gradio as gr |
|
import os |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
|
summarizer_ft = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") |
|
|
|
summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
def summarize(full_txt, min_summ_len=30): |
|
l = full_txt.split(" ") |
|
l_summ = [] |
|
chunk_len = 750 |
|
overlap = 50 |
|
pointer = 0 |
|
flag = True |
|
while(flag): |
|
if pointer < len(l): |
|
if pointer + chunk_len < len(l): |
|
txt = " ".join(l[pointer:pointer+chunk_len]) |
|
pointer = pointer + chunk_len - overlap |
|
l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']) |
|
else: |
|
txt = " ".join(l[pointer:]) |
|
l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text']) |
|
pointer = len(l) |
|
flag = False |
|
|
|
large_summ = " ".join(l_summ) |
|
print(l_summ) |
|
l_large_summ = large_summ.split(" ") |
|
|
|
if len(large_summ.split(" ")) < chunk_len: |
|
summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text'] |
|
else: |
|
flag = True |
|
pointer = 0 |
|
final_summ = [] |
|
while(flag): |
|
if pointer < len(l_large_summ): |
|
if pointer + chunk_len < len(l_large_summ): |
|
txt = " ".join(l_large_summ[pointer:pointer+chunk_len]) |
|
pointer = pointer + chunk_len - overlap |
|
t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'] |
|
print(t) |
|
final_summ.append(t) |
|
else: |
|
txt = " ".join(l_large_summ[pointer:]) |
|
t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text'] |
|
final_summ.append(t) |
|
print(t) |
|
pointer = len(l_large_summ) |
|
flag = False |
|
large_summ = " ".join(final_summ) |
|
summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text'] |
|
return summ |
|
|
|
def extract_text(youtube_video_url,min_summ_len): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
video_id = youtube_video_url.split("=")[1] |
|
transcript_text = YouTubeTranscriptApi.get_transcript(video_id,languages=['hi', 'en']) |
|
transcript = " " |
|
for i in transcript_text: |
|
transcript += " " + i["text"] |
|
print(transcript) |
|
res = summarize(transcript,min_summ_len) |
|
print(res) |
|
return res |
|
|
|
demo = gr.Interface( |
|
fn=extract_text, |
|
inputs=["text","number"], |
|
outputs="text", |
|
title="YouTube Video Text Summarization for Efficient Information Capture", |
|
description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.", |
|
) |
|
|
|
demo.launch(debug=True) |