Spaces:

aakash0563
/

YouTube-Video-Text-Summarization

Sleeping

App Files Files Community

aakash0563 commited on Feb 3, 2024

Commit

9d18fbb

verified ·

1 Parent(s): 043f83f

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import pandas as pd
+import numpy as np
+import torch
+from transformers import pipeline
+import gradio as gr
+import os
+from youtube_transcript_api import YouTubeTranscriptApi
+summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn")
+def summarize(full_txt, min_summ_len=30):
+    l = full_txt.split(" ")
+    l_summ = []
+    chunk_len = 750
+    overlap = 50
+    pointer = 0
+    flag = True
+    while(flag):
+        if pointer < len(l):
+            if pointer + chunk_len < len(l):
+                txt = " ".join(l[pointer:pointer+chunk_len])
+                pointer = pointer + chunk_len - overlap
+                l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'])
+            else:
+                txt = " ".join(l[pointer:])
+                l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text'])
+                pointer = len(l)
+                flag = False
+    large_summ = " ".join(l_summ)
+    print(l_summ)
+    l_large_summ = large_summ.split(" ")
+    if len(large_summ.split(" ")) < chunk_len:
+        summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
+    else:
+        flag = True
+        pointer = 0
+        final_summ = []
+        while(flag):
+            if pointer < len(l_large_summ):
+                if pointer + chunk_len < len(l_large_summ):
+                    txt = " ".join(l_large_summ[pointer:pointer+chunk_len])
+                    pointer = pointer + chunk_len - overlap
+                    t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']
+                    print(t)
+                    final_summ.append(t)
+                else:
+                    txt = " ".join(l_large_summ[pointer:])
+                    t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text']
+                    final_summ.append(t)
+                    print(t)
+                    pointer = len(l_large_summ)
+                    flag = False
+        large_summ = " ".join(final_summ)
+        summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text']
+    return summ
+def extract_text(youtube_video_url,min_summ_len):
+    try:
+        video_id = youtube_video_url.split("=")[1]
+        transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
+        transcript = ""
+        for i in transcript_text:
+            transcript += " " + i["text"]
+        res = summarize(transcript,min_summ_len)
+        return res
+    except Exception as e:
+        raise e
+demo = gr.Interface(
+    fn=extract_text,
+    inputs=["text","number"],  # Number input first, then file input
+    outputs="text",
+    title="YouTube Video Text Summarization for Efficient Information Capture",
+    description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.",
+)
+demo.launch(debug=True)