LaoCzi commited on
Commit
8e06b61
·
1 Parent(s): 276f54a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ from dotenv import load_dotenv
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores.faiss import FAISS
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.llms import OpenAI
9
+ from langchain.chains import ChatVectorDBChain
10
+ from langchain.prompts import PromptTemplate
11
+ from pathlib import Path
12
+ import os
13
+ import openai
14
+ import gradio as gr
15
+
16
+ load_dotenv()
17
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
18
+ #OPENAI_KEY = "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"
19
+ # https://pypi.org/project/youtube-transcript-api/
20
+
21
+
22
+
23
+ _template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
24
+ Chat History:
25
+ {chat_history}
26
+ Follow Up Input: {question}
27
+ Standalone question:"""
28
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
29
+
30
+ template = """You are an AI version of the youtuber {name} .
31
+ You are given the following extracted parts of a long document and a question. Provide a conversational answer.
32
+ Question: {question}
33
+ =========
34
+ {context}
35
+ =========
36
+ Answer:"""
37
+ QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context", "name"])
38
+
39
+ video1 = "ReeLQR7KCcM"
40
+ youtuberName = ""
41
+
42
+ def generate(video_url, question):
43
+ if "youtube.com/watch?v=" in video_url: x=111
44
+ else: return "Неверный URL"
45
+
46
+ video_id = video_url[-11:]
47
+ try:
48
+ t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
49
+ # do something with the transcript
50
+ except Exception as e:
51
+ return "An error occurred:"+e
52
+
53
+ finalString = ""
54
+ for item in t:
55
+ text = item['text']
56
+ finalString += text + " "
57
+ print("Transcript:",finalString)
58
+ print("Transcript lenght:",len(finalString))
59
+ if (len(finalString)>15000): finalString = finalString[:15000]
60
+
61
+ # load data sources to text (yt->text)
62
+ text_splitter = CharacterTextSplitter()
63
+ chunks = text_splitter.split_text(finalString)
64
+ vectorStorePkl = Path("vectorstore.pkl")
65
+ vectorStore = None
66
+ # if vectorStorePkl.is_file():
67
+ # print("vector index found.. ")
68
+ # with open('vectorstore.pkl', 'rb') as f:
69
+ # vectorStore = pickle.load(f)
70
+ # else:
71
+ print("regenerating search index vector store..")
72
+ # It uses OpenAI API to create embeddings (i.e. a feature vector)
73
+ # https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture
74
+ vectorStore = FAISS.from_texts(chunks, OpenAIEmbeddings(openai_api_key=OPENAI_KEY))
75
+ with open("vectorstore.pkl", "wb") as f:
76
+ pickle.dump(vectorStore, f)
77
+
78
+ qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, openai_api_key=OPENAI_KEY),
79
+ vectorstore=vectorStore, qa_prompt=QA_PROMPT)
80
+
81
+ chat_history = []
82
+ userInput = question
83
+
84
+ response = qa({"name": youtuberName, "question": userInput, "chat_history": chat_history}, return_only_outputs=True)
85
+ print("Result:",response["answer"])
86
+ return response["answer"]
87
+
88
+ examples = [['https://www.youtube.com/watch?v=u_P8md6brDI'],
89
+ ['https://www.youtube.com/watch?v=ao_OZ_bzMP8']
90
+ ]
91
+
92
+ title = "YouTube Summorize (only english video < 15 min)"
93
+ demo = gr.Interface(fn=generate, css=".gradio-container {background-color: lightblue}",
94
+ inputs=[
95
+ gr.Textbox(lines=1, label="Video URL"),
96
+ gr.Textbox(lines=1, label="Question", value="What is this video about?"),
97
+ ],
98
+ outputs=[gr.Textbox(lines=4, label="Ответ:")],
99
+ title = title,
100
+ examples = examples)
101
+ demo.launch(share=False, debug=False)