import pandas as pd import numpy as np import torch from transformers import pipeline import gradio as gr import os from youtube_transcript_api import YouTubeTranscriptApi summarizer_ft = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn") def summarize(full_txt, min_summ_len=30): l = full_txt.split(" ") l_summ = [] chunk_len = 750 overlap = 50 pointer = 0 flag = True while(flag): if pointer < len(l): if pointer + chunk_len < len(l): txt = " ".join(l[pointer:pointer+chunk_len]) pointer = pointer + chunk_len - overlap l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']) else: txt = " ".join(l[pointer:]) l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text']) pointer = len(l) flag = False large_summ = " ".join(l_summ) print(l_summ) l_large_summ = large_summ.split(" ") if len(large_summ.split(" ")) < chunk_len: summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text'] else: flag = True pointer = 0 final_summ = [] while(flag): if pointer < len(l_large_summ): if pointer + chunk_len < len(l_large_summ): txt = " ".join(l_large_summ[pointer:pointer+chunk_len]) pointer = pointer + chunk_len - overlap t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'] print(t) final_summ.append(t) else: txt = " ".join(l_large_summ[pointer:]) t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text'] final_summ.append(t) print(t) pointer = len(l_large_summ) flag = False large_summ = " ".join(final_summ) summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text'] return summ def extract_text(youtube_video_url,min_summ_len): # try: # video_id = youtube_video_url.split("=")[1] # transcript_text = YouTubeTranscriptApi.get_transcript(video_id) # transcript = "" # for i in transcript_text: # transcript += " " + i["text"] # print(transcript) # res = summarize(transcript,min_summ_len) # return res # except Exception as e: # raise e video_id = youtube_video_url.split("=")[1] transcript_text = YouTubeTranscriptApi.get_transcript(video_id,languages=['hi', 'en']) transcript = " " for i in transcript_text: transcript += " " + i["text"] print(transcript) res = summarize(transcript,min_summ_len) print(res) return res demo = gr.Interface( fn=extract_text, inputs=["text","number"], # Number input first, then file input outputs="text", title="YouTube Video Text Summarization for Efficient Information Capture", description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.", ) demo.launch(debug=True)