Spaces:
Sleeping
Sleeping
File size: 4,801 Bytes
073e2b4 fbee908 073e2b4 fbee908 151f648 fbee908 d9e3ffc fbee908 151f648 fbee908 8cb2a5a 151f648 3d331ca 151f648 3d331ca 151f648 515a8de d7aeb92 515a8de d7aeb92 515a8de d7aeb92 515a8de 5964686 515a8de d7aeb92 515a8de d7aeb92 515a8de c63913d 515a8de d7aeb92 515a8de d7aeb92 515a8de d7aeb92 f60d1cb 515a8de f60d1cb d7aeb92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import subprocess
subprocess.check_call(["pip", "install", "transformers==4.35.2"])
subprocess.check_call(["pip", "install", "torch>=1.7.1"])
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"])
subprocess.check_call(["pip", "install", "pytube"])
subprocess.check_call(["pip", "install", "pathlib"])
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
import transformers
import torch
import os
import pathlib
from huggingface_hub import login
import pytube
def install_missing_packages():
required_packages = {
"torch":">=1.11.0",
"transformers":">=4.35.2",
"pytube":None,
"huggingface_hub": ">=0.19.0"
}
for package, version in required_packages.items():
try:
__import__(package)
except ImportError:
package_name = f"{package}{version}" if version else package
subprocess.check_call(["pip", "install", package_name])
install_missing_packages()
hf_token = os.getenv("HF_TOKEN")
if hf_token:
login(hf_token)
else:
raise ValueError("HF_TOKEN environment variable not set.")
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
# import gradio as gr
# # Load the model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# # Define a function for summarization
# def summarize_youtube_content(input_text):
# # Use the pipeline for summarization
# summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
# summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)
# return summary[0]['generated_text']
# # Create a Gradio interface
# interface = gr.Interface(
# fn=summarize_youtube_content,
# inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."),
# outputs=gr.Textbox(lines=5, label="Summarized Content"),
# title="YouTube Content Summarizer",
# description="Paste the transcript of a YouTube video to generate a concise summary.",
# )
# # Launch the Gradio app
# if __name__ == "__main__":
# interface.launch()
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
def extract_video_id(url):
"""
Extract video ID from YouTube URL
"""
parsed_url = urlparse(url)
if parsed_url.hostname == 'youtu.be':
return parsed_url.path[1:]
if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
if parsed_url.path == '/watch':
return parse_qs(parsed_url.query)['v'][0]
return None
def get_transcript(video_id):
"""
Get transcript from YouTube video
"""
try:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
transcript = ' '.join([t['text'] for t in transcript_list])
return transcript
except Exception as e:
return f"Error getting transcript: {str(e)}"
def summarize_youtube_video(video_url):
"""
Main function to summarize YouTube video content
"""
try:
# Extract video ID
video_id = extract_video_id(video_url)
if not video_id:
return "Invalid YouTube URL"
# Get transcript
transcript = get_transcript(video_id)
if transcript.startswith("Error"):
return transcript
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# Create summarization pipeline
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
# Generate summary
summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
return f"An error occurred: {str(e)}"
# Create Gradio interface
interface = gr.Interface(
fn=summarize_youtube_video,
inputs=gr.Textbox(
lines=1,
placeholder="Enter YouTube video URL here..."
),
outputs=gr.Textbox(
lines=5,
label="Video Summary"
),
title="YouTube Video Summarizer",
description="Enter a YouTube video URL to generate a concise summary of its content.",
)
# Launch the interface
if __name__ == "__main__":
interface.launch()
##########################
|