Spaces:
Sleeping
Sleeping
| """ | |
| YouTube Video Analysis and Interaction Module | |
| This module provides a comprehensive set of tools for analyzing YouTube videos, | |
| extracting information, and answering questions based on video content. It leverages | |
| the LangChain library for natural language processing tasks and the YouTube Transcript | |
| API for fetching video transcripts. | |
| Classes: | |
| MainPointsExtractor: | |
| Extracts and formats main points from YouTube video transcripts. | |
| Timestamps are formatted for direct use in YouTube comments, enabling clickable | |
| links to specific video sections when pasted. | |
| SummaryExtractor: | |
| Handles the extraction and formatting of video summaries. | |
| QuestionAnswerExtractor: | |
| Processes user questions and extracts answers from video transcripts. | |
| YouTubeAgent: | |
| Manages the overall agent setup for interacting with YouTube videos and processing user queries. | |
| Key Features: | |
| - Main points summarization in multiple formats | |
| - Video content summarization | |
| - Question answering based on video content | |
| - Flexible AI agent for handling various YouTube video-related tasks | |
| """ | |
| import os | |
| import openai | |
| from typing import List, Dict, Any, Union, Type | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from langchain_core.pydantic_v1 import BaseModel, Field | |
| from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain_openai import ChatOpenAI | |
| from langchain.schema.runnable import RunnableLambda, RunnablePassthrough | |
| from langchain.agents import tool, AgentExecutor | |
| from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser, JsonOutputFunctionsParser | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_core.utils.function_calling import convert_to_openai_function | |
| from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser | |
| from langchain.agents.format_scratchpad import format_to_openai_functions | |
| from langchain.memory import ConversationBufferWindowMemory | |
| from dotenv import load_dotenv, find_dotenv | |
| _ = load_dotenv(find_dotenv()) # read local .env file | |
| openai.api_key = os.environ['OPENAI_API_KEY'] | |
| def get_temperature(): | |
| return 0 #Default value | |
| def set_temperature(new_temperature): | |
| global get_temperature | |
| def new_get_temperature(): | |
| return new_temperature | |
| get_temperature = new_get_temperature | |
| # print(f"Temperature set to: {get_temperature()}") | |
| class Points_1(BaseModel): | |
| """Pydantic model for representing extracted points from Youtube-Transcript""" | |
| timestamp: float = Field(description="The timestamp (in floating-point number) of when main points are discussed or talked about in the video.") | |
| main_point: str = Field(description="A title for Main point.") | |
| summary: str = Field(description="A summary of main points discussed at that timestamp. End with fullstop.") | |
| emoji: str = Field(description="An emoji that matches the summary.") | |
| class Points_2(BaseModel): | |
| """Pydantic model for representing extracted points.""" | |
| main_point: str = Field(description="The main topic, theme, or subject extracted from the subtitle.") | |
| summary: str = Field(description="The context or brief explanation of the main point.") | |
| emoji: str = Field(description="An emoji that represents or summarizes the main point.") | |
| timestamp: float = Field(description="The timestamp (in floating-point number) from the video where the main point is mentioned.") | |
| class MainPointsExtractor: | |
| """ | |
| A tool for extracting and formatting main points from YouTube video transcripts. | |
| This class provides methods to process transcripts and identify key points | |
| using natural language processing techniques. | |
| """ | |
| class Info_1(BaseModel): | |
| """Pydantic model for representing a collection of points.""" | |
| points: List[Points_1] | |
| class Info_2(BaseModel): | |
| """Pydantic model for representing a collection of points.""" | |
| points: List[Points_2] | |
| def get_youtube_video_main_points(youtube_video_id: str) -> str: | |
| """ | |
| Extracts and formats main points with Timestamps from YouTube video transcripts. Timestamps are formatted for direct use in YouTube comments, enabling clickable links to specific video sections when pasted. | |
| Args: | |
| youtube_video_id (str): The ID of the YouTube video. | |
| Returns: | |
| str: Formatted string of main points extracted from the video. | |
| """ | |
| try: | |
| transcript = MainPointsExtractor._get_youtube_video_transcript(youtube_video_id) | |
| main_points_1 = MainPointsExtractor._extract_main_points(transcript, MainPointsExtractor.Info_1) | |
| main_points_2 = MainPointsExtractor._extract_main_points(transcript, MainPointsExtractor.Info_2) | |
| formatted_output = f"""Main points extracted from YouTube video (ID: {youtube_video_id})\nStyle_1:\n```\n{main_points_2}\n```\nStyle_2:\n```\n{main_points_1}\n```\nChoose the style that best suits your needs for presenting the main points of the video.""" | |
| return formatted_output | |
| except Exception as e: | |
| raise | |
| def _get_youtube_video_transcript(youtube_video_id: str) -> str: | |
| """ | |
| Fetches the transcript for a YouTube video. | |
| Args: | |
| youtube_video_id (str): The ID of the YouTube video. | |
| Returns: | |
| str: The full transcript of the video. | |
| Raises: | |
| Exception: If there's an error fetching the transcript. | |
| """ | |
| try: | |
| transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) | |
| transcript_data = [f"{entry['start']:.2f}: {entry['text']} " for entry in transcript_json] | |
| return "".join(transcript_data) | |
| except Exception as e: | |
| raise | |
| def _extract_main_points(transcript: str, info_model: Union[Type[Info_1], Type[Info_2]]) -> List[Dict[str, Any]]: | |
| """ | |
| Extracts main points from the transcript using NLP techniques. | |
| This method maintains a conversation history to provide context for subsequent calls. | |
| Args: | |
| transcript (str): The full transcript of the video. | |
| Returns: | |
| List[Dict[str, Any]]: A list of dictionaries containing extracted main points. | |
| """ | |
| main_points_extraction_function = [convert_to_openai_function(info_model)] | |
| model = ChatOpenAI(temperature=get_temperature()) | |
| extraction_model = model.bind(functions=main_points_extraction_function) | |
| system_message = f""" | |
| You are an AI assistant that extracts info from video transcripts. | |
| When extracting info, ensure that: | |
| 1. Each point has a unique timestamp. | |
| In addition to these specific requirements, you have the authority to make other improvements as you see fit. This may include: | |
| - Refining the summaries for clarity and conciseness | |
| - Adjusting emoji choices to better represent the content | |
| - Reorganizing points for better logical flow | |
| - Removing redundant information | |
| - Adding context where necessary | |
| Your goal is to produce a refined and accurate representation of the main points from the video transcript. Use your judgment to balance adherence to the specific rules with overall improvement of the extracted information. | |
| """ | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", system_message), | |
| ("human", "{input}") | |
| ]) | |
| extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="points") | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0, chunk_size=8192, separators=[f" {char}" for char in "123456789"]) | |
| prep = RunnableLambda(lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]) | |
| chain = prep | extraction_chain.map() | MainPointsExtractor._flatten | MainPointsExtractor._format_youtube_comment | |
| result_1 = chain.invoke(transcript) | |
| return result_1 | |
| def _flatten(matrix): | |
| """Flattens a 2D list into a 1D list.""" | |
| return [item for row in matrix for item in row] | |
| def _format_youtube_comment(json_data: List[Dict[str, Any]]) -> str: | |
| """ | |
| Formats extracted main points into a YouTube-style comment. | |
| Args: | |
| json_data (List[Dict[str, Any]]): List of dictionaries containing main points. | |
| Returns: | |
| str: Formatted string representing the main points as a YouTube comment. | |
| """ | |
| def _format_timestamp(seconds): | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| seconds = int(seconds % 60) | |
| return f"{hours:02}:{minutes:02}:{seconds:02}" | |
| formatted_comment = "" | |
| for entry in json_data: | |
| timestamp = _format_timestamp(entry['timestamp']) | |
| emoji = entry['emoji'] | |
| summary = entry['summary'] | |
| if entry['main_point'].endswith('.'): | |
| point = entry['main_point'][:-1] | |
| else: | |
| point = entry['main_point'] | |
| formatted_comment += f"{timestamp} {emoji} {point}: {summary}\n" | |
| return formatted_comment.strip() | |
| ####################################################################################################################################### | |
| class Summary(BaseModel): | |
| """Pydantic model for representing extracted summary.""" | |
| summary: str = Field(description="Extract detailed information from the content.") | |
| class SummaryExtractor: | |
| """ | |
| A tool for extracting and formatting summaries from YouTube video transcripts. | |
| This class provides methods to process transcripts and generate concise summaries | |
| using natural language processing techniques. | |
| """ | |
| class Info(BaseModel): | |
| """Pydantic model for representing a collection of summaries.""" | |
| summary: List[Summary] | |
| def get_youtube_video_summary(youtube_video_id: str) -> str: | |
| """ | |
| Extracts and formats a summary from a YouTube video transcript. | |
| Args: | |
| youtube_video_id (str): The ID of the YouTube video. | |
| Returns: | |
| str: Formatted string of the summary extracted from the video. | |
| """ | |
| try: | |
| transcript = SummaryExtractor._get_youtube_video_transcript(youtube_video_id) | |
| summary = SummaryExtractor._extract_summary(transcript) | |
| return SummaryExtractor._format_summary(summary) | |
| except Exception as e: | |
| return f"Error extracting summary: {str(e)}" | |
| def _get_youtube_video_transcript(youtube_video_id: str) -> str: | |
| """ | |
| Fetches the transcript for a YouTube video. | |
| Args: | |
| youtube_video_id (str): The ID of the YouTube video. | |
| Returns: | |
| str: The full transcript of the video. | |
| Raises: | |
| Exception: If there's an error fetching the transcript. | |
| """ | |
| try: | |
| transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) | |
| transcript_data = [entry['text'] for entry in transcript_json] | |
| return " ".join(transcript_data) | |
| except Exception as e: | |
| raise | |
| def _extract_summary(transcript: str) -> List[Summary]: | |
| """ | |
| Extracts a summary from a YouTube video transcript. | |
| Args: | |
| transcript (str): The full transcript of the video. | |
| Returns: | |
| Summary: A Summary object containing the extracted summary. | |
| """ | |
| summary_extraction_function = [convert_to_openai_function(SummaryExtractor.Info)] | |
| model = ChatOpenAI(temperature=get_temperature()) | |
| extraction_model = model.bind(functions=summary_extraction_function) | |
| prompt = ChatPromptTemplate.from_messages([("human", "{input}")]) | |
| extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="summary") | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0, chunk_size=8192, separators=[f" {char}" for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) | |
| prep = RunnableLambda(lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]) | |
| chain = prep | extraction_chain.map() | MainPointsExtractor._flatten | |
| return chain.invoke(transcript) | |
| def _format_summary(summaries: List[Summary]) -> str: | |
| """ | |
| Formats the list of summaries into a single string. | |
| Args: | |
| summaries (List[Summary]): List of Summary objects. | |
| Returns: | |
| str: A formatted string containing all summaries. | |
| """ | |
| return "\n\n".join([s["summary"] for s in summaries]) | |
| ############################################################################################################################################################# | |
| class Answer(BaseModel): | |
| """Pydantic model for representing an answer to a question.""" | |
| answer: str = Field(description="The answer to the user's question based on the video transcript.") | |
| confidence: float = Field(description="A confidence score between 0 and 1 indicating how certain the model is about the answer.") | |
| class QuestionAnswerExtractor: | |
| """ | |
| A tool for answering questions about YouTube videos based on their transcripts. | |
| This class provides methods to process transcripts and generate answers to user questions | |
| using natural language processing techniques. | |
| """ | |
| class Info(BaseModel): | |
| """Pydantic model for representing a collection of answers.""" | |
| answers: List[Answer] | |
| def get_answer(youtube_video_id: str, question: str) -> str: | |
| """ | |
| Answers a question about a YouTube video based on its transcript. | |
| Args: | |
| youtube_video_id (str): The ID of the YouTube video. | |
| question (str): The user's question about the video. | |
| Returns: | |
| str: Formatted string containing the answer to the user's question. | |
| """ | |
| try: | |
| transcript = QuestionAnswerExtractor._get_youtube_video_transcript(youtube_video_id) | |
| answer = QuestionAnswerExtractor._extract_answer(transcript, question) | |
| return QuestionAnswerExtractor._format_answer(answer) | |
| except Exception as e: | |
| return f"Error answering question: {str(e)}" | |
| def _get_youtube_video_transcript(youtube_video_id: str) -> str: | |
| """ | |
| Fetches the transcript for a YouTube video. | |
| Args: | |
| youtube_video_id (str): The ID of the YouTube video. | |
| Returns: | |
| str: The full transcript of the video. | |
| Raises: | |
| Exception: If there's an error fetching the transcript. | |
| """ | |
| try: | |
| transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) | |
| transcript_data = [entry['text'] for entry in transcript_json] | |
| return " ".join(transcript_data) | |
| except Exception as e: | |
| raise | |
| def _extract_answer(transcript: str, question: str) -> List[Answer]: | |
| """ | |
| Extracts an answer to the user's question from the YouTube video transcript. | |
| Args: | |
| transcript (str): The full transcript of the video. | |
| question (str): The user's question about the video. | |
| Returns: | |
| List[Answer]: A list of Answer objects containing the extracted answers. | |
| """ | |
| answer_extraction_function = [convert_to_openai_function(QuestionAnswerExtractor.Info)] | |
| model = ChatOpenAI(temperature=get_temperature()) | |
| extraction_model = model.bind(functions=answer_extraction_function, function_call={"name": "Info"}) | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "You are an AI assistant tasked with answering questions about a video based on its transcript."), | |
| ("human", "Transcript: {transcript}\n\nQuestion: {question}\n\nProvide an answer to the question based on the transcript, along with a confidence score.") | |
| ]) | |
| extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="answers") | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=192, chunk_size=8000, separators=[f" {char}" for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) | |
| def prepare_input(x): | |
| chunks = text_splitter.split_text(x['transcript']) | |
| return [{"transcript": chunk, "question": x['question']} for chunk in chunks] | |
| prep = RunnableLambda(prepare_input) | |
| chain = prep | extraction_chain.map() | QuestionAnswerExtractor._flatten | |
| return chain.invoke({"transcript": transcript, "question": question}) | |
| def _flatten(matrix): | |
| """Flattens a 2D list into a 1D list.""" | |
| return [item for row in matrix for item in row] | |
| def _format_answer(answers: List[Answer]) -> str: | |
| """ | |
| Formats the list of answers into a single string. | |
| Args: | |
| answers (List[Answer]): List of Answer objects. | |
| Returns: | |
| str: A formatted string containing the best answer and its confidence score. | |
| """ | |
| if not answers: | |
| return "I couldn't find an answer to your question based on the video transcript." | |
| # Sort answers by confidence score and take the best one | |
| best_answer = max(answers, key=lambda x: x['confidence']) | |
| return f"{best_answer['answer']}({best_answer['confidence']:.2f})" | |
| ####################################################################################################################################### | |
| class YouTubeAgent: | |
| """ | |
| An agent for interacting with YouTube videos and processing user queries. | |
| This class sets up the necessary components for an AI agent that can understand | |
| and respond to user queries about YouTube videos. | |
| """ | |
| def __init__(self): | |
| """Initializes the YouTubeAgent with necessary tools and components.""" | |
| self.tools = [ | |
| MainPointsExtractor.get_youtube_video_main_points, | |
| SummaryExtractor.get_youtube_video_summary, | |
| QuestionAnswerExtractor.get_answer | |
| ] | |
| self.sys_message = "You are a helpful assistant." | |
| self.functions = [convert_to_openai_function(f) for f in self.tools] | |
| self.model = ChatOpenAI(temperature=get_temperature()).bind(functions=self.functions) | |
| self.prompt = ChatPromptTemplate.from_messages([ | |
| ("system", self.sys_message), | |
| MessagesPlaceholder(variable_name="history"), | |
| ("user", "{input}"), | |
| MessagesPlaceholder(variable_name="agent_scratchpad") | |
| ]) | |
| self.agent_chain = RunnablePassthrough.assign( | |
| agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"]) | |
| ) | self.prompt | self.model | OpenAIFunctionsAgentOutputParser() | |
| self.memory = ConversationBufferWindowMemory(k=3, return_messages=True, memory_key="history") | |
| self.agent_executor = AgentExecutor(agent=self.agent_chain, tools=self.tools, memory=self.memory) | |
| def invoke(self, input_text: str) -> str: | |
| """ | |
| Processes a user input and returns the agent's response. | |
| Args: | |
| input_text (str): The user's input query. | |
| Returns: | |
| str: The agent's response to the user's query. | |
| """ | |
| try: | |
| result = self.agent_executor.invoke({"input": input_text}) | |
| return result['output'] | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| # youtube_agent = YouTubeAgent() | |
| # video_link = "https://www.youtube.com/watch?v=-OSxeoIAs2w" | |
| # main_points = youtube_agent.invoke(f"The race involves which challenges in the following video {video_link}") |