File size: 3,713 Bytes
8e06b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import pickle

from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain
from langchain.prompts import PromptTemplate
from pathlib import Path
import os
import openai
import gradio as gr 

load_dotenv()
OPENAI_KEY = os.getenv('OPENAI_KEY')
#OPENAI_KEY = "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"
# https://pypi.org/project/youtube-transcript-api/



_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """You are an AI version of the youtuber {name} .
You are given the following extracted parts of a long document and a question. Provide a conversational answer.
Question: {question}
=========
{context}
=========
Answer:"""
QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context", "name"])

video1 = "ReeLQR7KCcM"
youtuberName = ""

def generate(video_url, question):
  if "youtube.com/watch?v=" in video_url: x=111
  else: return "Неверный URL"

  video_id = video_url[-11:]
  try:
    t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
    # do something with the transcript
  except Exception as e:
    return "An error occurred:"+e

  finalString = ""
  for item in t:
      text = item['text']
      finalString += text + " "
  print("Transcript:",finalString)
  print("Transcript lenght:",len(finalString))
  if (len(finalString)>15000): finalString = finalString[:15000]

  # load data sources to text (yt->text)
  text_splitter = CharacterTextSplitter()
  chunks = text_splitter.split_text(finalString)
  vectorStorePkl = Path("vectorstore.pkl")
  vectorStore = None
  # if vectorStorePkl.is_file():
  #     print("vector index found.. ")
  #     with open('vectorstore.pkl', 'rb') as f:
  #         vectorStore = pickle.load(f)
  # else:
  print("regenerating search index vector store..")
  # It uses OpenAI API to create embeddings (i.e. a feature vector)
  # https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture
  vectorStore = FAISS.from_texts(chunks, OpenAIEmbeddings(openai_api_key=OPENAI_KEY))
  with open("vectorstore.pkl", "wb") as f:
      pickle.dump(vectorStore, f)

  qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, openai_api_key=OPENAI_KEY),
                                  vectorstore=vectorStore, qa_prompt=QA_PROMPT)

  chat_history = []
  userInput = question 

  response = qa({"name": youtuberName, "question": userInput, "chat_history": chat_history}, return_only_outputs=True)
  print("Result:",response["answer"])
  return response["answer"]

examples = [['https://www.youtube.com/watch?v=u_P8md6brDI'],
            ['https://www.youtube.com/watch?v=ao_OZ_bzMP8'] 
           ]

title = "YouTube Summorize (only english video < 15 min)"
demo = gr.Interface(fn=generate, css=".gradio-container {background-color: lightblue}",
                     inputs=[
                              gr.Textbox(lines=1, label="Video URL"), 
                              gr.Textbox(lines=1, label="Question", value="What is this video about?"),
                              ], 
                      outputs=[gr.Textbox(lines=4, label="Ответ:")], 
                      title = title, 
                      examples = examples)
demo.launch(share=False, debug=False)