Spaces:
Sleeping
Sleeping
implementation
Browse files- __init__.py +0 -0
- app.py +122 -0
- audio_to_text.py +39 -0
- requirements.txt +7 -0
- text_summary.py +201 -0
- youtube_extraction.py +31 -0
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
import time
|
5 |
+
from functools import wraps
|
6 |
+
from shutil import rmtree
|
7 |
+
|
8 |
+
import streamlit as st
|
9 |
+
|
10 |
+
from audio_to_text import transcribe_audio
|
11 |
+
from text_summary import (align_chapters, get_automatic_chapters,
|
12 |
+
summarize_chapters)
|
13 |
+
from youtube_extraction import get_youtube_chapters, youtube_to_audio
|
14 |
+
|
15 |
+
|
16 |
+
def timing_decorator(message):
|
17 |
+
def decorator(func):
|
18 |
+
@wraps(func)
|
19 |
+
def wrapper(*args, **kwargs):
|
20 |
+
with st.spinner(message):
|
21 |
+
start_time = time.time()
|
22 |
+
result = func(*args, **kwargs)
|
23 |
+
end_time = time.time()
|
24 |
+
st.write(f"{message} complete - {end_time - start_time:.2f}s")
|
25 |
+
return result
|
26 |
+
|
27 |
+
return wrapper
|
28 |
+
|
29 |
+
return decorator
|
30 |
+
|
31 |
+
|
32 |
+
@timing_decorator("Downloading Youtube video")
|
33 |
+
def download_youtube(youtube_url, work_dir):
|
34 |
+
audio_fpath = youtube_to_audio(youtube_url, work_dir)
|
35 |
+
# Get Youtube chapters, return empty list if is not in metadata
|
36 |
+
yt_chapters = get_youtube_chapters(youtube_url)
|
37 |
+
return audio_fpath, yt_chapters
|
38 |
+
|
39 |
+
|
40 |
+
@timing_decorator("Transcribing audio")
|
41 |
+
def audio_to_text(audio_fpath):
|
42 |
+
# Transcribe video with Whisper
|
43 |
+
timestamped_text = transcribe_audio(audio_fpath)
|
44 |
+
return timestamped_text
|
45 |
+
|
46 |
+
|
47 |
+
@timing_decorator("Retrieving chapters")
|
48 |
+
def retrieve_chapters(timestamped_text, yt_chapters):
|
49 |
+
# Get chapters
|
50 |
+
if len(yt_chapters) == 0:
|
51 |
+
chapters = get_automatic_chapters(timestamped_text)
|
52 |
+
else:
|
53 |
+
chapters = align_chapters(timestamped_text, yt_chapters)
|
54 |
+
return chapters
|
55 |
+
|
56 |
+
|
57 |
+
@timing_decorator("Summarizing video")
|
58 |
+
def summarize_youtube_chapters(chapters):
|
59 |
+
# Summarize chapters
|
60 |
+
summarized_chapters = summarize_chapters(chapters)
|
61 |
+
return summarized_chapters
|
62 |
+
|
63 |
+
|
64 |
+
def get_work_dir():
|
65 |
+
temp_dir = tempfile.TemporaryDirectory()
|
66 |
+
work_dir = temp_dir.name
|
67 |
+
return work_dir
|
68 |
+
|
69 |
+
|
70 |
+
def convert_seconds(seconds):
|
71 |
+
hours = int(seconds // 3600)
|
72 |
+
minutes = int((seconds % 3600) // 60)
|
73 |
+
seconds = int((seconds % 3600) % 60)
|
74 |
+
|
75 |
+
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
76 |
+
|
77 |
+
|
78 |
+
def summarize_video(youtube_url):
|
79 |
+
st.video(youtube_url)
|
80 |
+
# Create a temporary directory to store the audio file
|
81 |
+
work_dir = get_work_dir()
|
82 |
+
|
83 |
+
# Summarize the video
|
84 |
+
audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
|
85 |
+
timestamped_text = audio_to_text(audio_fpath)
|
86 |
+
|
87 |
+
chapters = retrieve_chapters(timestamped_text, yt_chapters)
|
88 |
+
summarized_chapters, overall_summary = summarize_youtube_chapters(chapters)
|
89 |
+
|
90 |
+
st.write(f"**TLDR:** {overall_summary}")
|
91 |
+
|
92 |
+
for summarized_chapter in summarized_chapters:
|
93 |
+
start_time = convert_seconds(summarized_chapter["start"])
|
94 |
+
end_time = convert_seconds(summarized_chapter["end"])
|
95 |
+
|
96 |
+
timestamp = f"{start_time} - {end_time}"
|
97 |
+
title = summarized_chapter["title"]
|
98 |
+
summary = summarized_chapter["summary"]
|
99 |
+
|
100 |
+
# Display the hyperlink with timestamp and title
|
101 |
+
hyperlink = (
|
102 |
+
f"[{timestamp} - {title}]({youtube_url}&t={summarized_chapter['start']}s)"
|
103 |
+
)
|
104 |
+
st.markdown(hyperlink, unsafe_allow_html=True)
|
105 |
+
|
106 |
+
st.write(summary)
|
107 |
+
rmtree(work_dir)
|
108 |
+
|
109 |
+
|
110 |
+
def app():
|
111 |
+
st.title("Video Summarizer")
|
112 |
+
youtube_url = st.text_input("Enter a YouTube URL")
|
113 |
+
|
114 |
+
# Add summarize button
|
115 |
+
summarize_button = st.button("Summarize")
|
116 |
+
|
117 |
+
if summarize_button:
|
118 |
+
summarize_video(youtube_url)
|
119 |
+
|
120 |
+
|
121 |
+
if __name__ == "__main__":
|
122 |
+
app()
|
audio_to_text.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
|
3 |
+
EOS_TOKENS = [".", "!", "?"]
|
4 |
+
|
5 |
+
|
6 |
+
def transcribe_audio(audio_fpath, max_snt_len=100):
|
7 |
+
model = whisper.load_model("small")
|
8 |
+
result = model.transcribe(audio_fpath)
|
9 |
+
|
10 |
+
sentences = []
|
11 |
+
snt_start = None
|
12 |
+
snt = ""
|
13 |
+
for segment in result["segments"]:
|
14 |
+
snt += f'{segment["text"]} '
|
15 |
+
if not snt_start:
|
16 |
+
snt_start = segment["start"]
|
17 |
+
if (
|
18 |
+
segment["text"].strip().split()[-1][-1] in EOS_TOKENS
|
19 |
+
or len(snt) > max_snt_len
|
20 |
+
):
|
21 |
+
sentences.append(
|
22 |
+
{"text": snt.strip(), "start": snt_start, "end": segment["end"]}
|
23 |
+
)
|
24 |
+
snt_start = None
|
25 |
+
snt = ""
|
26 |
+
|
27 |
+
if len(snt) > 0:
|
28 |
+
sentences.append(
|
29 |
+
{"text": snt.strip(), "start": snt_start, "end": segment["end"]}
|
30 |
+
)
|
31 |
+
snt_start = None
|
32 |
+
snt = ""
|
33 |
+
|
34 |
+
timestamped_text = ""
|
35 |
+
for sentence in sentences:
|
36 |
+
timestamped_text += (
|
37 |
+
f'{sentence["start"]} {sentence["end"]} {sentence["text"]}\n'
|
38 |
+
)
|
39 |
+
return timestamped_text
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/openai/whisper.git
|
2 |
+
openai
|
3 |
+
yt-dlp
|
4 |
+
streamlit
|
5 |
+
scikit-learn
|
6 |
+
tenacity
|
7 |
+
langchain
|
text_summary.py
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import textwrap
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import openai
|
6 |
+
from langchain.chains.summarize import load_summarize_chain
|
7 |
+
from langchain.docstore.document import Document
|
8 |
+
from langchain.llms import OpenAI
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from sklearn.cluster import KMeans
|
11 |
+
from tenacity import stop_after_attempt # for exponential backoff
|
12 |
+
from tenacity import retry, wait_random_exponential
|
13 |
+
|
14 |
+
DEFAULT_PROMPT = (
|
15 |
+
"Summarize this Youtube video chapter. Always start with a topical sentence: "
|
16 |
+
)
|
17 |
+
CHAPTER_TITLE = "Give a title to this video chapter based on the transcript: "
|
18 |
+
|
19 |
+
title_template = "Give a title to this text summary: {text}"
|
20 |
+
TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
|
21 |
+
|
22 |
+
openai.api_key = os.environ.get("CHATGPT_API_KEY")
|
23 |
+
|
24 |
+
if openai.api_key is None:
|
25 |
+
raise ValueError("CHATGPT_API_KEY environment variable not set")
|
26 |
+
|
27 |
+
|
28 |
+
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
29 |
+
def get_embeddings(text_chunks, model="text-embedding-ada-002"):
|
30 |
+
data = openai.Embedding.create(input=text_chunks, model=model)["data"]
|
31 |
+
embeddings = [item["embedding"] for item in data]
|
32 |
+
return np.array(embeddings)
|
33 |
+
|
34 |
+
|
35 |
+
def text_from_file(text_path):
|
36 |
+
in_text = ""
|
37 |
+
with open(text_path, "r", encoding="utf-8") as text_file:
|
38 |
+
for line in text_file:
|
39 |
+
in_text += line
|
40 |
+
return in_text
|
41 |
+
|
42 |
+
|
43 |
+
def get_chunks(timestamped_transcripts, chunk_lines):
|
44 |
+
chunks = []
|
45 |
+
current_chunk = []
|
46 |
+
for line in timestamped_transcripts:
|
47 |
+
current_chunk.append(line)
|
48 |
+
if len(current_chunk) == chunk_lines:
|
49 |
+
chunks.append("\n".join(current_chunk))
|
50 |
+
current_chunk = []
|
51 |
+
|
52 |
+
if len(current_chunk) > 0:
|
53 |
+
chunks.append("\n".join(current_chunk))
|
54 |
+
|
55 |
+
return chunks
|
56 |
+
|
57 |
+
|
58 |
+
def align_chapters(timestamped_transcript, yt_chapters):
|
59 |
+
timestamped_transcripts = timestamped_transcript.strip().split("\n")
|
60 |
+
|
61 |
+
chapters = []
|
62 |
+
chapter_text = ""
|
63 |
+
chapter_start_time = 0.0
|
64 |
+
prev_end_time = 0.0
|
65 |
+
chapter_index = 0
|
66 |
+
for idx, trn in enumerate(timestamped_transcripts):
|
67 |
+
trn_start_time = float(trn.split()[0])
|
68 |
+
trn_end_time = float(trn.split()[1])
|
69 |
+
trn_text = " ".join(trn.split()[2:])
|
70 |
+
|
71 |
+
if idx == 0:
|
72 |
+
chapter_start_time = trn_start_time
|
73 |
+
|
74 |
+
next_index = min(chapter_index + 1, len(yt_chapters) - 1)
|
75 |
+
if trn_start_time >= yt_chapters[next_index]["start_time"]:
|
76 |
+
if len(chapters) == len(yt_chapters):
|
77 |
+
chapter_text += f"{trn_text}\n"
|
78 |
+
else:
|
79 |
+
chapters.append(
|
80 |
+
{
|
81 |
+
"text": chapter_text,
|
82 |
+
"start_time": chapter_start_time,
|
83 |
+
"end_time": prev_end_time,
|
84 |
+
"title": yt_chapters[chapter_index]["title"],
|
85 |
+
}
|
86 |
+
)
|
87 |
+
chapter_text = trn_text
|
88 |
+
chapter_start_time = trn_start_time
|
89 |
+
chapter_index += 1
|
90 |
+
else:
|
91 |
+
chapter_text += f"{trn_text}\n"
|
92 |
+
prev_end_time = trn_end_time
|
93 |
+
|
94 |
+
if len(chapters) == len(yt_chapters):
|
95 |
+
chapter_index = len(yt_chapters) - 1
|
96 |
+
chapters[chapter_index]["text"] += chapter_text
|
97 |
+
chapters[chapter_index]["end_time"] = prev_end_time
|
98 |
+
return chapters
|
99 |
+
|
100 |
+
|
101 |
+
def get_automatic_chapters(timestamped_transcript, chunk_lines=5, num_clusters=3):
|
102 |
+
timestamped_transcripts = timestamped_transcript.split("\n")
|
103 |
+
|
104 |
+
# Split into chunks
|
105 |
+
text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
|
106 |
+
embeddings = get_embeddings(text_chunks)
|
107 |
+
|
108 |
+
# Creating and fitting the K-means model
|
109 |
+
kmeans = KMeans(n_clusters=num_clusters)
|
110 |
+
kmeans.fit(embeddings)
|
111 |
+
|
112 |
+
# Getting the cluster labels
|
113 |
+
cluster_labels = kmeans.labels_
|
114 |
+
|
115 |
+
current_label = -1
|
116 |
+
current_text = ""
|
117 |
+
chapters = []
|
118 |
+
for idx, (text_chunk, label) in enumerate(zip(text_chunks, cluster_labels)):
|
119 |
+
start_time, end_time = get_chunk_timestamps(text_chunk)
|
120 |
+
|
121 |
+
if idx == 0:
|
122 |
+
chapter_start_time = start_time
|
123 |
+
|
124 |
+
if label != current_label and current_label != -1:
|
125 |
+
chapters.append(
|
126 |
+
{
|
127 |
+
"text": current_text,
|
128 |
+
"start_time": chapter_start_time,
|
129 |
+
"end_time": prev_end_time,
|
130 |
+
"title": "",
|
131 |
+
}
|
132 |
+
)
|
133 |
+
current_text = ""
|
134 |
+
chapter_start_time = start_time
|
135 |
+
|
136 |
+
current_label = label
|
137 |
+
current_text += get_chunk_text(text_chunk)
|
138 |
+
prev_end_time = end_time
|
139 |
+
if len(current_text) > 0:
|
140 |
+
chapters.append(
|
141 |
+
{
|
142 |
+
"text": current_text,
|
143 |
+
"start_time": chapter_start_time,
|
144 |
+
"end_time": prev_end_time,
|
145 |
+
"title": "",
|
146 |
+
}
|
147 |
+
)
|
148 |
+
return chapters
|
149 |
+
|
150 |
+
|
151 |
+
def get_chunk_timestamps(chunk):
|
152 |
+
start_time = float(chunk.strip().split("\n")[0].split()[0])
|
153 |
+
end_time = float(chunk.strip().split("\n")[-1].split()[1])
|
154 |
+
return start_time, end_time
|
155 |
+
|
156 |
+
|
157 |
+
def get_chunk_text(chunk):
|
158 |
+
chunk_text = ""
|
159 |
+
for chunk_line in chunk.strip().split("\n"):
|
160 |
+
chunk_text += " ".join(chunk_line.split()[2:])
|
161 |
+
return chunk_text
|
162 |
+
|
163 |
+
|
164 |
+
def summarize_chapters(chapters):
|
165 |
+
llm = OpenAI(temperature=0.9, openai_api_key=os.environ.get("CHATGPT_API_KEY"))
|
166 |
+
chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
|
167 |
+
|
168 |
+
summary_chain = load_summarize_chain(
|
169 |
+
llm, chain_type="map_reduce", return_intermediate_steps=True
|
170 |
+
)
|
171 |
+
summaries = summary_chain(
|
172 |
+
{"input_documents": chapter_docs}, return_only_outputs=True
|
173 |
+
)
|
174 |
+
|
175 |
+
summary_docs = [
|
176 |
+
Document(page_content=summary) for summary in summaries["intermediate_steps"]
|
177 |
+
]
|
178 |
+
|
179 |
+
title_chain = load_summarize_chain(
|
180 |
+
llm,
|
181 |
+
chain_type="map_reduce",
|
182 |
+
return_intermediate_steps=True,
|
183 |
+
map_prompt=TITLE_PROMPT,
|
184 |
+
)
|
185 |
+
titles = title_chain({"input_documents": summary_docs}, return_only_outputs=True)
|
186 |
+
|
187 |
+
summarized_chapters = []
|
188 |
+
for chapter, chapter_summary, chapter_title in zip(
|
189 |
+
chapters, summaries["intermediate_steps"], titles["intermediate_steps"]
|
190 |
+
):
|
191 |
+
if len(chapter["title"]) > 0:
|
192 |
+
chapter_title = chapter["title"]
|
193 |
+
summarized_chapters.append(
|
194 |
+
{
|
195 |
+
"start": chapter["start_time"],
|
196 |
+
"end": chapter["end_time"],
|
197 |
+
"title": chapter_title.strip(),
|
198 |
+
"summary": chapter_summary.strip(),
|
199 |
+
}
|
200 |
+
)
|
201 |
+
return summarized_chapters, summaries["output_text"]
|
youtube_extraction.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
|
3 |
+
import yt_dlp
|
4 |
+
|
5 |
+
|
6 |
+
def youtube_to_audio(url, output_path, filename_template="youtube_video"):
|
7 |
+
ydl_opts = {
|
8 |
+
"outtmpl": f"{output_path}/{filename_template}",
|
9 |
+
"format": "m4a/bestaudio/best",
|
10 |
+
"postprocessors": [
|
11 |
+
{ # Extract audio using ffmpeg
|
12 |
+
"key": "FFmpegExtractAudio",
|
13 |
+
"preferredcodec": "m4a",
|
14 |
+
}
|
15 |
+
],
|
16 |
+
}
|
17 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
18 |
+
ydl.download([url])
|
19 |
+
file_path = glob.glob(f"{output_path}/{filename_template}*")[0]
|
20 |
+
return file_path
|
21 |
+
|
22 |
+
|
23 |
+
def get_youtube_chapters(url):
|
24 |
+
video_chapters = []
|
25 |
+
ydl_opts = {}
|
26 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
27 |
+
info = ydl.extract_info(url, download=False)
|
28 |
+
if "chapters" in info and info["chapters"]:
|
29 |
+
video_chapters = info["chapters"]
|
30 |
+
|
31 |
+
return video_chapters
|