MohamedAlgebali commited on
Commit
2ca28b3
·
1 Parent(s): 573a59d

initial commit

Browse files
Files changed (7) hide show
  1. app.py +291 -0
  2. gpt3.py +66 -0
  3. gpt4.py +62 -0
  4. requirements.txt +9 -0
  5. utils.py +235 -0
  6. vqueri.jpeg +0 -0
  7. whisper_result.py +143 -0
app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gpt4
2
+ import gpt3
3
+ from time import sleep
4
+ from asyncio import run
5
+ from langchain.prompts import PromptTemplate
6
+ from utils import *
7
+ import streamlit as st
8
+ from pathlib import Path
9
+ from streamlit_option_menu import option_menu
10
+
11
+ question_prompt_template = """
12
+ You are very good at handling very long texts,so I will give you a video transcription splitted in small pieces,this is piece number {i}.You will get a query about it,\n\n
13
+ transcription: {input}\n\n
14
+
15
+ query: {question} \n\n
16
+ feel free to neglect the given transcription if you see that the query is not related to it like thank you or ok and similars, provide instead an appropriate answer like you are welcome.
17
+ query may be a question about it or not, do your best to extract the answer if it exists or make up a suitable answer but hint me if you made one(say for example This answer is not mentioned but and this is a made one).
18
+ or it can be explaining something in a simpler way,
19
+ or it can be writing programming code explaining a concept in it,
20
+ or summerizing it in number of words,
21
+ or splitting it to chapters of homogenious content like youtube does.Do your best to give me the answer in this format "hr:min:sec title" and make sure that each chapter is at least 3 minutes.
22
+ or any query
23
+ you may be asked to provide your answer in specific language like arabic, and you must provide your answer in the asked language.
24
+ Also you may be provided with the previous query and a summary of your answer to use them like a memory of past interactions.
25
+ You can neglect them if you see that the answer of the current query doesn't need them.
26
+
27
+ Your answer:\n\n
28
+ """
29
+
30
+ prompt = PromptTemplate(input_variables=["i","input", "question"], template=question_prompt_template)
31
+
32
+ async def get_answer(question):
33
+ try:
34
+ resp = await gpt4.Completion().create(question)
35
+ return resp
36
+
37
+ except:
38
+ try:
39
+ resp = await gpt3.Completion().create(question)
40
+ return resp
41
+ except:
42
+ st.info('Service may be stopped or you are disconnected with internet. Feel free to open an issue here "https://github.com/Mohamed01555/VideoQuERI"')
43
+ st.stop()
44
+
45
+ def img_to_bytes(img_path):
46
+ img_bytes = Path(img_path).read_bytes()
47
+ encoded = base64.b64encode(img_bytes).decode()
48
+ return encoded
49
+
50
+ def main():
51
+ # setup streamlit page
52
+ st.set_page_config(
53
+ page_title="VideoQuERI",
54
+ page_icon="vqueri.jpeg")
55
+
56
+ option = option_menu(
57
+ menu_title=None,
58
+ options=["Home", "FAQs", "Contact", "Donate"],
59
+ icons=["house-check", "patch-question-fill", "envelope","currency-dollar"],
60
+ orientation='horizontal',
61
+ styles={
62
+ "container": {"padding": "0!important", "background-color": "#333"},
63
+ "icon": {"color": "orange", "font-size": "25px"},
64
+ "nav-link": {"font-size": "25px", "text-align": "left", "margin":"0px", "--hover-color": "#ff9900"},
65
+ "nav-link-selected": {"background-color": "#6c757d"},
66
+ }
67
+ )
68
+
69
+ st.markdown(page_bg_img, unsafe_allow_html=True)
70
+ st.markdown(html_code, unsafe_allow_html=True)
71
+
72
+ # initialize responses.
73
+ if "responses" not in st.session_state:
74
+ st.session_state.responses = []
75
+
76
+ # initialize caption.
77
+ if "caption" not in st.session_state:
78
+ st.session_state.caption = None
79
+
80
+ # initialize test_splitter.
81
+ if "text_splitter" not in st.session_state:
82
+ text_splitter = None
83
+
84
+ # Initialize session state variables
85
+ if 'captions' not in st.session_state:
86
+ st.session_state.captions = {}
87
+
88
+ # initialize chunks.
89
+ if "chunks" not in st.session_state:
90
+ st.session_state.chunks = None
91
+
92
+ if "button_pressed" not in st.session_state:
93
+ st.session_state.button_pressed = False
94
+
95
+ if "chosen_chunks" not in st.session_state:
96
+ st.session_state.chosen_chunks = []
97
+
98
+ if "prev_qa" not in st.session_state:
99
+ st.session_state.prev_qa = None
100
+
101
+ if 'video_url_list' not in st.session_state:
102
+ st.session_state.video_url_list = []
103
+
104
+ if "question" not in st.session_state:
105
+ st.session_state.question = None
106
+
107
+ if "chosen_radio" not in st.session_state:
108
+ st.session_state.chosen_radio = None
109
+
110
+ # Set the maximum number of stored captions
111
+ MAX_CAPTIONS = 10
112
+
113
+ with st.sidebar:
114
+ video_url = st.text_input("**Paste the video url here:**")
115
+
116
+ help_slider= "Processing the entire video in a single iteration might be beyond the capability of GPT.\
117
+ So we split it in chunks. Please choose the desired chunk size. The bigger the chunk size is, the more precise the answer you get."
118
+ selected_value = st.slider("Select a value for chunk size", min_value=100, max_value=3000, value=1500, step=1, help=help_slider)
119
+
120
+ help_button = "Creating captions from scratch for a video lasting one hour typically requires approximately 2 minutes.\n \
121
+ In the event of the server experiencing a high volume of requests, the caption generation process could become significantly delayed.\
122
+ If this occurs, we recommend revisiting at a different time. Alternatively, if you already possess the caption, please feel free to provide it below."
123
+
124
+ if st.button("Generate the Caption...", help = help_button):
125
+ st.session_state.button_pressed = True
126
+ if (video_url.strip().startswith('http') or video_url.strip().startswith('https')):
127
+ with st.spinner("Generating the video Caption..."):
128
+ if video_url not in st.session_state.captions.keys():
129
+ st.session_state.caption, ret = get_transcript(video_url)
130
+
131
+ if st.session_state.caption:
132
+ if ret == 'return_from_whisper':
133
+ st.session_state.captions[video_url] = st.session_state.caption
134
+ text_splitter = TokenTextSplitter(chunk_size = selected_value, chunk_overlap=11)
135
+ st.session_state.chunks = text_splitter.split_documents(st.session_state.caption)
136
+
137
+ #add the url to the list to ensure whether i will provide a summary of perious qa
138
+ st.info("Caption was generated successfully. You can ask now.")
139
+
140
+ else:
141
+ st.info('Most likely it is not a video, Or caption eneration service if full now. Please try again later')
142
+ st.stop()
143
+ else:
144
+ st.session_state.caption = st.session_state.captions[video_url]
145
+ text_splitter = TokenTextSplitter(chunk_size = selected_value, chunk_overlap=11)
146
+ st.session_state.chunks = text_splitter.split_documents(st.session_state.caption)
147
+
148
+ #add the url to the list to ensure whether i will provide a summary of perious qa
149
+ st.info("Caption was generated successfully. You can ask now")
150
+
151
+
152
+ # Limit the number of stored captions
153
+ if len(st.session_state.captions) > MAX_CAPTIONS:
154
+ oldest_url = next(iter(st.session_state.captions))
155
+ st.session_state.captions.pop(oldest_url)
156
+
157
+ else:
158
+ st.info('Valid URL must start with `http://` or `https://` ')
159
+ st.stop()
160
+
161
+ if st.session_state.button_pressed:
162
+ t=''
163
+ for c,doc in enumerate(st.session_state.chunks):
164
+ start, end = extract_start_end_time(doc.page_content)
165
+ if start is not None and end is not None:
166
+ t += f'Chunk {c+1} : from {start} to {end}\n\n'
167
+ with st.expander('**Info :information_source:**'):
168
+ st.info(
169
+ f'Number of Chunks : {len(st.session_state.chunks)}\n\n{t}'
170
+ )
171
+
172
+ with st.expander("**If your query is about specific chunks, please choose them** :slightly_smiling_face:"):
173
+
174
+ st.session_state.chosen_chunks = []
175
+ for i in range(len(st.session_state.chunks)):
176
+ chosen_chunk = st.checkbox(label= str(i+1))
177
+ if chosen_chunk:
178
+ st.session_state.chosen_chunks.append(i + 1)
179
+
180
+ if st.session_state.chosen_chunks:
181
+ st.info(f"Selected Chunks: {st.session_state.chosen_chunks}")
182
+
183
+ st.session_state.chosen_radio = st.radio("Do you wnat to add some sort of memory?", ['No', 'Yes'], help="Note that it is not that accurate memory")
184
+
185
+ if option == 'Home':
186
+ for response in st.session_state.responses:
187
+ with st.chat_message(response['role']):
188
+ st.markdown(response['content'], unsafe_allow_html=True)
189
+
190
+
191
+ st.session_state.question = st.chat_input('Your Query...')
192
+ if st.session_state.question:
193
+ if not st.session_state.button_pressed:
194
+ st.info("You forgot to enter your Video URL and click *Generate the Caption...* button.")
195
+ st.stop()
196
+
197
+ with st.chat_message('user'):
198
+ st.markdown(st.session_state.question,unsafe_allow_html=True)
199
+
200
+ st.session_state.responses.append({'role':"user", 'content': st.session_state.question})
201
+
202
+ with st.chat_message('assistant'):
203
+ st.session_state.message_placeholder = st.empty()
204
+ full_response = ''
205
+ #if the user entered specefic chunks to query about
206
+ if len(st.session_state.chosen_chunks) != 0:
207
+ for c in st.session_state.chosen_chunks:
208
+ doc = st.session_state.chunks[c-1]
209
+ # full_response = answer(chunk_number=c, doc = doc, question = question)
210
+ query = prompt.format(i = c, input = doc.page_content, question = st.session_state.question)
211
+
212
+ try:
213
+ if video_url == st.session_state.video_url_list[-1]:
214
+ query += st.session_state.prev_qa if st.session_state.prev_qa else ''
215
+ except:
216
+ query = query
217
+ start, end = extract_start_end_time(doc.page_content)
218
+ if start is not None and end is not None:
219
+ with st.spinner(f"Searching for the answer in the period {start} --> {end}"):
220
+ ai_response = run(get_answer(query))
221
+ ai_response_decoded = decode_unicode(ai_response)
222
+ time_ = f"""<span style="color: #00FF00;">Answer in the period <span style="color: #800080;">{start} --> {end}</span> is \n\n</span>"""
223
+ full_response += '\n' + time_ + '\n'+ ai_response_decoded + '\n'
224
+
225
+ st.session_state.message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True)
226
+
227
+
228
+ else:
229
+ ai_response = run(get_answer(query))
230
+ ai_response_decoded = decode_unicode(ai_response)
231
+ full_response += '\n\n' + ai_response_decoded + '\n\n'
232
+
233
+ st.session_state.message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True)
234
+
235
+
236
+ #if the user did not entered specefic chunks, use all chunks
237
+ else:
238
+ for c,doc in enumerate(st.session_state.chunks):
239
+ # full_response = answer(chunk_number=c+1, doc = doc, question = question)
240
+ query = prompt.format(i = c+1, input = doc.page_content, question = st.session_state.question)
241
+
242
+ try:
243
+ if video_url == st.session_state.video_url_list[-1]:
244
+ query += st.session_state.prev_qa if st.session_state.prev_qa else ''
245
+ except:
246
+ query = query
247
+
248
+ start, end = extract_start_end_time(doc.page_content)
249
+ if start is not None and end is not None:
250
+ with st.spinner(f"Searching for the answer in the period {start} --> {end}"):
251
+ ai_response = run(get_answer(query))
252
+
253
+ ai_response_decoded = decode_unicode(ai_response)
254
+ time = f"""<span style="color: #00FF00;">Answer in the period <span style="color: #800080;">{start} --> {end}</span> is \n\n</span>"""
255
+ full_response += '\n' + time + '\n'+ ai_response_decoded + '\n'
256
+
257
+ st.session_state.message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True)
258
+
259
+ else:
260
+ ai_response = run(get_answer(query))
261
+ ai_response_decoded = decode_unicode(ai_response)
262
+ full_response += '\n' + ai_response_decoded
263
+
264
+ st.session_state.message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True)
265
+
266
+ st.session_state.message_placeholder.markdown(full_response, unsafe_allow_html=True)
267
+
268
+ if st.session_state.chosen_radio == 'Yes':
269
+ # get a summary of the answer and append before the next question
270
+ summary_prompt = f"""
271
+ Please summarize this in 100 to 200 words as a mximum.
272
+ Retain any programming code present, even if doing so exceeds the 200-word limit.
273
+ Capture the entites if exist\n{full_response}
274
+ """
275
+ summary = run(get_answer(summary_prompt))
276
+ st.session_state.prev_qa = f"This is the previous question: {st.session_state.question}\nand this is the summary of your answer: {summary}"
277
+
278
+
279
+ st.session_state.video_url_list.append(video_url)
280
+
281
+ st.session_state.responses.append({'role' : 'assistant', 'content' : full_response})
282
+
283
+ elif option == 'FAQs':
284
+ FAQs()
285
+ elif option == 'Contact':
286
+ contact()
287
+ else:
288
+ donate()
289
+
290
+ if __name__ == '__main__':
291
+ main()
gpt3.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from aiohttp import ClientSession, ClientError
2
+ from json import loads, JSONDecodeError
3
+
4
+
5
+ class Completion:
6
+ """
7
+ This class provides methods for generating completions based on prompts.
8
+ """
9
+
10
+ async def create(self, prompt):
11
+ """
12
+ Create a new completion based on the given prompt.
13
+
14
+ Args:
15
+ prompt (str): The prompt to generate a completion for.
16
+
17
+ Returns:
18
+ str: The generated completion.
19
+
20
+ Raises:
21
+ Exception: If unable to fetch the response.
22
+ """
23
+ try:
24
+ async with ClientSession() as session:
25
+ async with session.post(
26
+ "https://ava-alpha-api.codelink.io/api/chat",
27
+ headers={"Content-Type": "application/json"},
28
+ json={
29
+ "model": "gpt-4",
30
+ "temperature": 0.6,
31
+ "stream": True,
32
+ "messages": [
33
+ {
34
+ "role": "system",
35
+ "content": "You are Ava, an AI assistant.",
36
+ },
37
+ {"role": "user", "content": prompt},
38
+ ],
39
+ },
40
+ timeout=45,
41
+ ) as resp_obj:
42
+ resp = ""
43
+ async for line in resp_obj.content:
44
+ line_text = line.decode("utf-8").strip()
45
+ if line_text.startswith("data:"):
46
+ data = line_text.split("data:")[1]
47
+ try:
48
+ data_json = loads(data)
49
+ if "choices" in data_json:
50
+ choices = data_json["choices"]
51
+ for choice in choices:
52
+ if (
53
+ "finish_reason" in choice
54
+ and choice["finish_reason"] == "stop"
55
+ ):
56
+ break
57
+ if (
58
+ "delta" in choice
59
+ and "content" in choice["delta"]
60
+ ):
61
+ resp += choice["delta"]["content"]
62
+ except JSONDecodeError:
63
+ pass
64
+ return resp
65
+ except:
66
+ raise Exception("Unable to fetch the response.")
gpt4.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from uuid import uuid4
2
+ from re import findall
3
+ import tls_client
4
+
5
+ class Completion:
6
+ async def create(self, prompt):
7
+ """
8
+ Create a completion for the given prompt using the you.com API.
9
+
10
+ Args:
11
+ prompt (str): The prompt for which completion is requested.
12
+ proxy (str, optional): The proxy to be used for the API request. Defaults to None.
13
+
14
+ Returns:
15
+ str: The completion result as a string.
16
+
17
+ Raises:
18
+ Exception: If unable to fetch the response or the required token from the response.
19
+ """
20
+ client = tls_client.Session(client_identifier="chrome_108")
21
+ client.headers = {
22
+ "authority": "you.com",
23
+ "accept": "text/event-stream",
24
+ "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3",
25
+ "cache-control": "no-cache",
26
+ "referer": "https://you.com/search?q=who+are+you&tbm=youchat",
27
+ "sec-ch-ua": '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
28
+ "sec-ch-ua-mobile": "?0",
29
+ "sec-ch-ua-platform": '"Windows"',
30
+ "sec-fetch-dest": "empty",
31
+ "sec-fetch-mode": "cors",
32
+ "sec-fetch-site": "same-origin",
33
+ "cookie": f"safesearch_guest=Off; uuid_guest={str(uuid4())}",
34
+ "user-agent": "Mozilla/5.0 (Windows NT 5.1; U; ; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.52",
35
+ }
36
+ params = {
37
+ "q": prompt,
38
+ "page": 1,
39
+ "count": 10,
40
+ "safeSearch": "Off",
41
+ "onShoppingPage": False,
42
+ "mkt": "",
43
+ "responseFilter": "WebPages,Translations,TimeZone,Computation,RelatedSearches",
44
+ "domain": "youchat",
45
+ "queryTraceId": str(uuid4()),
46
+ "chat": [],
47
+ }
48
+ resp = client.get(
49
+ "https://you.com/api/streamingSearch", params=params, timeout_seconds=30
50
+ )
51
+
52
+ print("Response Status Code:", resp.status_code)
53
+ print("Response Text:", resp.text)
54
+
55
+ if "youChatToken" not in resp.text:
56
+ raise Exception("Unable to fetch response.")
57
+ return (
58
+ "".join(findall(r"{\"youChatToken\": \"(.*?)\"}", resp.text))
59
+ .replace("\\n", "\n")
60
+ .replace("\\\\", "\\")
61
+ .replace('\\"', '"')
62
+ )
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio_client==0.4.0
2
+ langchain==0.0.268
3
+ tiktoken==0.4.0
4
+ pytube==15.0.0
5
+ streamlit==1.25.0
6
+ streamlit_option_menu==0.3.6
7
+ youtube_transcript_api==0.6.1
8
+ yt_dlp==2023.3.4
9
+ tls-client == 0.2.1
utils.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ import streamlit as st
3
+ from langchain.docstore.document import Document
4
+ from langchain.text_splitter import TokenTextSplitter
5
+ import re
6
+ import base64
7
+ from whisper_result import *
8
+
9
+ def postprocess_time_if_transcript_was_already_generated(time):
10
+ if time < 60:
11
+ sec = int(time)
12
+ return f'0:{sec}'
13
+
14
+ hour = int(time) // 3600
15
+ min = int(time) // 60
16
+ sec = int(time) % 60
17
+ if hour == 0:
18
+ return f'{min}:{sec}'
19
+ else:
20
+ return f"{hour}:{abs(hour*60 - min)}:{sec}"
21
+
22
+ def ret_trans(vid):
23
+ # retrieve the available transcripts
24
+ transcript_list = YouTubeTranscriptApi.list_transcripts(vid)
25
+
26
+ # iterate over all available transcripts
27
+ for transcript in transcript_list:
28
+ if 'en' in transcript.language_code:
29
+ return transcript.fetch()
30
+
31
+ elif transcript.is_translatable and 'en' in [t['language_code'] for t in transcript.translation_languages]:
32
+ return transcript.translate('en').fetch()
33
+
34
+ else:
35
+ return transcript.fetch()
36
+
37
+ def get_generated_transcript(video_url):
38
+ video_id = video_url.split('=')[1]
39
+ res = ret_trans(video_id)
40
+
41
+ transcript = ', '.join([f"{postprocess_time_if_transcript_was_already_generated(t['start'])} {t['text']}" for t in res])
42
+ transcript = [Document(page_content=transcript)]
43
+
44
+ return transcript
45
+
46
+ def extract_start_end_time(passage):
47
+ time_pattern = r'\d{1,2}:\d{1,2}(?::\d{1,2})?'
48
+
49
+ times = re.findall(time_pattern, passage)
50
+ # print(times)
51
+ if len(times) >= 2:
52
+ start_time = times[1]
53
+ end_time = times[-2]
54
+ # print(times)
55
+ return start_time, end_time
56
+ else:
57
+ return None, None
58
+
59
+ def decode_unicode(text):
60
+ return bytes(text, "utf-8").decode("unicode-escape")
61
+
62
+ def get_transcript(video_url):
63
+ try: #if the transcript was alrady generated
64
+ transcript = get_generated_transcript(video_url)
65
+ return transcript, 'return_from_generated_transcript'
66
+ except:
67
+ st.info("Looks like the provided video does not have transcription. Plese be patient until transcription is generated.")
68
+ s = time.time()
69
+ transcript = get_whisper_result(video_url)
70
+ if transcript:
71
+ st.info(f"Generating Caption took {round(time.time() - s, 2)} seconds")
72
+ return [Document(page_content=transcript)], 'return_from_whisper'
73
+
74
+ else:
75
+ return False, ''
76
+
77
+ # Define your FAQ questions and answers
78
+ def FAQs():
79
+ faq = {
80
+ "What is VideoQuERI?":"It is a versatile and interactive website that utilizes AI capabilities to process videos, answer questions, generate code, solve puzzles, and perform mathematical operations.\
81
+ It depends that the video is described in someone's voice not visually. If the video's description is solely visual, the algorithm will not function effectively.",
82
+
83
+ "What are the Capabilities of VideoQuERI?<ul>" :
84
+ " <li>**Video Processing**: Users can input video URLs to your website. The AI can then process these videos to extract information, such as speech recognition for transcriptions.</li>"
85
+ " <li>**Question Answering**:Users can ask questions related to the video's content. The website's AI can analyze the video's transcriptions and content to provide relevant answers to users' questions.</li>"
86
+ " <li>**Code Generation**: If the video contains step-by-step instructions for coding, AI can extract these instructions and generate code snippets.</li>"
87
+ " <li>**Generating Chapters**: You can ask the bot to help you splitting your video to chapters.</li>"
88
+ " <li>**Puzzle Solving**: Videos with puzzle verbally instructions can be processed by the AI to understand the rules and mechanics. Users can input puzzle-specific queries, and it can provide solutions or hints.</li>"
89
+ " <li>**Memory**: Chatbot has memory to retain and recall information from previous parts of the conversation. But,honestly, it is not that strong.</li>"
90
+ " <li>**Information Retrieval** : If you forget when a piece of information was said, you can provide the video and your question.</li>"
91
+ " <li>**Educational Content**: Your website can serve as an educational platform by offering explanations, demonstrations, and tutorials on various subjects based on the video content.</li>"
92
+ " <li>**Natural Language Understanding**: The AI can understand and analyze the natural language used in both the video's transcriptions and user queries. This allows for more contextually accurate responses.</li>"
93
+ " <li>**Interactive UI**: Your website's user interface can incorporate elements like text input fields, and result displays to make the interactions intuitive and engaging.</li>"
94
+ " <li>**Scalability**: The AI-driven capabilities can be applied to various types of videos, making your website versatile and adaptable to different content.</li> </ul> "
95
+ ,
96
+
97
+ "What if the user has already generated transcription (e.g. from platforms like Coursera or Udemy)?":
98
+ "You can copy it and ask ChatGPT or Poe",
99
+
100
+ "what if Caption generation took a long time?":"There are two propable reasons. First, the video url is not supported. Second, the transcription generation API has too many requuests\
101
+ If the first case, then the video may be streamed to wesite in .ts format , and .ts is not supported .However,if your case is the second case, you can visit the us after a period of time.",
102
+
103
+ "What if the video is in your local machine?":"You can Upload it to your google drive and then share the link with us.",
104
+
105
+ "What are supported formats?" :
106
+ "However, most video formats are supported, streaming videos in the .ts format (Transport Stream) are currently not compatible with our system.\
107
+ Transport Stream is a container format often used for streaming live broadcasts and might require specialized processing.\
108
+ If you have a .ts format video, you might consider converting it to a supported format using 'ffmpeg' and upload it to your drive and share the link with us.\
109
+ We appreciate your understanding and are here to assist you with any questions you may have! ",
110
+
111
+ "How can I get the video link?":
112
+ """You should install this <a href="https://chrome.google.com/webstore/detail/video-downloadhelper/lmjnegcaeklhafolokijcfjliaokphfk?hl=es">chrome extension</a>, \
113
+ <a href = "https://addons.mozilla.org/en-US/firefox/addon/video-downloadhelper/">firefox extension</a>.\
114
+ If you are in the webpage that has the desired video click on the extension logo , a menu will be listed , click copy url, finally paste in the video url input field.
115
+ """ ,
116
+
117
+ "What languages are supported?" :
118
+ "Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French,\
119
+ Galician, German, Greek, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, Macedonian, Malay, Marathi,\
120
+ Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh.",
121
+
122
+ "Is there a tip to get the most out of VideoQuERI":"Yes, you should ask your question in English and ask the bot to answer in your favourite language(e.g. What is this video about? answer in 'arabic').",
123
+
124
+ "What is the purpose of the video URL field?":
125
+ "The video URL field allows you to input the URL of the video you want to query.Our system will analyze the video content to provide relevant answers.",
126
+
127
+ "How do I input a video URL, especially for platforms like Facebook or embedded videos?":
128
+ "To input a video URL, simply copy the URL of the video you want to query and paste it into the video URL field.",
129
+
130
+ "What is the chunk size slider for?":
131
+ "The chunk size slider lets you adjust the size of video segments that the system analyzes. This can help you get more focused and precise answers based on specific parts of the video.",
132
+
133
+ "How does the system generate answers from the video?":
134
+ "Our system uses advanced AI technology to analyze the video's audio content. It then generates answers based on the context and content of the video.",
135
+
136
+ "Is there a limit to the video length I can query?":
137
+ "While there's generally no strict limit on video length, very long videos might take longer to process. It's recommended to choose appropriate chunk sizes for efficient processing and accurate answers.",
138
+
139
+ "Can I change the chunk size while the video is being processed?":
140
+ "No, you can adjust the chunk size slider after generating the caption then click `Generate the Caption` button again . This allows you to explore different parts of the video and get answers for specific segments.",
141
+
142
+ "Can I ask questions while the caption is being generated?":
143
+ "No, you can ask questions after the caption generation is completed.",
144
+
145
+ "How accurate are the answers generated from the video?":
146
+ "The accuracy of answers depends on various factors such as the clarity of the audio, and the specificity of your questions. Generally, the system strives to provide relevant and coherent answers.",
147
+
148
+ "Can I save or bookmark specific answers from the video?":
149
+ "At the moment, the system doesn't offer direct saving or bookmarking of answers. However, you can take screenshots or notes to keep track of important information.",
150
+
151
+ "Are there certain types of videos that work better with this feature?":
152
+ "The system is designed to work with a wide range of videos, but videos with clear audio tend to yield better results. Educational, instructional, and well-structured videos are usually more suitable."
153
+
154
+
155
+ }
156
+ # with st.expander("FAQs"):
157
+ for i, faq_key in enumerate(faq.keys()):
158
+ # with st.sidebar.expander(faq_key):
159
+ st.write(f"**Q{i+1}. {faq_key}**\n \n**Answer** : {faq[faq_key]}", unsafe_allow_html=True)
160
+ st.write('-'*50)
161
+
162
+ def contact():
163
+ mail = """<h2><a href="mailto:[email protected]">Email</a></h2>"""
164
+ linkedin = """<h2><a href="https://www.linkedin.com/in/mohamed-algebali-213672173/">Linkedin</a></h2>"""
165
+
166
+ con = f"""
167
+ <h1>We can contact via :
168
+ <ul>
169
+ <li>{mail}</li>
170
+ <li>{linkedin}</li>
171
+ </ul>
172
+ </h1>
173
+ """
174
+ st.markdown(con, unsafe_allow_html=True)
175
+
176
+ def donate():
177
+ pass
178
+
179
+ def get_img_as_base64(file):
180
+ with open(file, "rb") as f:
181
+ data = f.read()
182
+ return base64.b64encode(data).decode()
183
+
184
+ logo='vqueri.jpeg'
185
+ img = get_img_as_base64(logo)
186
+
187
+ page_bg_img = f"""
188
+ <style>
189
+ [data-testid="stAppViewContainer"] > .main {{
190
+ # background-image: url("data:image/jpeg;base64,{img}");
191
+ # background-size: auto;
192
+ # # opacity:0.8;
193
+ # background-position: center;
194
+ # background-repeat: no-repeat;
195
+ # background-attachment: local;
196
+
197
+ }}
198
+
199
+ [data-testid="stSidebar"] > div:first-child {{
200
+ # background-image: url("data:image/jpeg;base64,{img}");
201
+ background-position: center;
202
+ background-repeat: no-repeat;
203
+ background-attachment: fixed;
204
+ }}
205
+
206
+ [data-testid="stHeader"] {{
207
+ margin-top: 0px;
208
+
209
+ background: rgba(0,0,0,0);
210
+ }}
211
+
212
+ [data-testid="stToolbar"] {{
213
+ right: 2rem;
214
+ }}
215
+
216
+ [data-testid="stExpander"]{{
217
+ margin-top:50px
218
+ }}
219
+
220
+
221
+ [data-testid="stVerticalBlock"]{{
222
+ margin-top: -5px;
223
+ # margin-top:30px
224
+ }}
225
+ </style>
226
+ """
227
+
228
+ html_code = """
229
+ <div style="display: flex;justify-content: center;align-items: center;">
230
+ <h1 style='text-align: center; color: '#6c757d';'>VideoQuERI, Chatting with Videos made Easy</h1>
231
+ <img style="width: 150px; margin-right: 10px; border-radius:50px "" src="data:image/jpeg;base64,{}" alt="Image Description">
232
+ </div>
233
+ """.format(img)
234
+
235
+
vqueri.jpeg ADDED
whisper_result.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import streamlit as st
3
+ from gradio_client import Client
4
+ import yt_dlp
5
+ import pytube
6
+ from pytube.exceptions import VideoUnavailable
7
+
8
+ client = Client("https://sanchit-gandhi-whisper-jax.hf.space/")
9
+
10
+ def get_whisper_res_if_the_video_is_youtube_video(video_url, max_recursion_time=20):
11
+ start_time = time.time() # Get the start time
12
+ while True:
13
+ try:
14
+ result = client.predict(
15
+ video_url, # str in 'YouTube URL' Textbox component
16
+ "translate", # str in 'Task' Radio component
17
+ True, # bool in 'Return timestamps' Checkbox component
18
+ fn_index=6)
19
+ # print('return from fn_index=6')
20
+ return result
21
+
22
+ except:
23
+
24
+ try:
25
+ result = client.predict(
26
+ video_url, # str in 'YouTube URL' Textbox component
27
+ "translate", # str in 'Task' Radio component
28
+ True, # bool in 'Return timestamps' Checkbox component
29
+ fn_index=7)
30
+ # print('return from fn_index=7')
31
+ return result
32
+
33
+ except:
34
+
35
+ try:
36
+ result = client.predict(
37
+ video_url, # str in 'YouTube URL' Textbox component
38
+ "translate", # str in 'Task' Radio component
39
+ True, # bool in 'Return timestamps' Checkbox component
40
+ api_name="/predict_2")
41
+ # print('return from /predict_2')
42
+ return result
43
+ except:
44
+ if time.time() - start_time > max_recursion_time:
45
+ # result = get_whisper_res_if_the_video_is_not_youtube_video(video_url)
46
+ # return result
47
+ return
48
+ time.sleep(2)
49
+ continue
50
+
51
+ def get_whisper_res_if_the_video_is_not_youtube_video(video_url, max_recursion_time = 20):
52
+ start_time = time.time() # Get the start time
53
+ while True:
54
+ try:
55
+ result = client.predict(
56
+ video_url, # str (filepath or URL to file) in 'inputs' Audio component
57
+ "translate", # str in 'Task' Radio component
58
+ True, # bool in 'Return timestamps' Checkbox component
59
+ api_name="/predict")
60
+ return result
61
+
62
+ except:
63
+ try:
64
+ result = client.predict(
65
+ video_url, # str in 'YouTube URL' Textbox component
66
+ "translate", # str in 'Task' Radio component
67
+ True, # bool in 'Return timestamps' Checkbox component
68
+ api_name="/predict_1")
69
+ return result
70
+
71
+ except:
72
+ if time.time() - start_time > max_recursion_time:
73
+ result = get_whisper_res_if_the_video_is_youtube_video(video_url)
74
+ return result
75
+
76
+ time.sleep(2)
77
+ continue
78
+
79
+ def postprocess_timestamps(result, index):
80
+ output_list = []
81
+ for text in result[index].split('\n'):
82
+ start = text.split(' -> ')[0][1:]
83
+ if len(start.split(':')) == 2: #there are only minutes and seconds
84
+ min = int(start.split(':')[0])
85
+ sec = int(float(start.split(':')[1]))
86
+
87
+ index_of_space = text[24:].find(' ')
88
+ # text = text[24+index_of_space:]
89
+ text = ':'.join([str(min), str(sec)]) + text[24+index_of_space:]
90
+
91
+ output_list.append(text)
92
+
93
+ else: #there are hours also
94
+ start = text.split(' -> ')[0][1:]
95
+
96
+ hour = int(start.split(':')[0])
97
+ min = int(start.split(':')[1])
98
+ sec = int(float(start.split(':')[2]))
99
+ # text = text[30:]
100
+
101
+ text = ':'.join([str(hour),str(min),str(sec)]) + text[30:]
102
+
103
+ output_list.append(text)
104
+ return output_list
105
+
106
+ def postprocess_whisper_jax_output(result):
107
+ # index = 1 if youtube video, else if non-youtube video
108
+ try:
109
+ output_list = postprocess_timestamps(result, index=1)
110
+ except:
111
+ output_list = postprocess_timestamps(result, index=0)
112
+
113
+ return ', '.join(output_list)
114
+
115
+ def get_audio_info(url):
116
+ with yt_dlp.YoutubeDL() as ydl:
117
+ info = ydl.extract_info(url, download = False)
118
+
119
+ return info
120
+
121
+ def get_whisper_result(video_url):
122
+ # if the link is youtube video
123
+ if 'youtube.com/watch?v=' in video_url:
124
+ try:
125
+ video = pytube.YouTube(video_url)
126
+ video.check_availability()
127
+
128
+ result = get_whisper_res_if_the_video_is_youtube_video(video_url)
129
+ transcript = postprocess_whisper_jax_output(result)
130
+ return transcript
131
+
132
+ except VideoUnavailable:
133
+ return False
134
+
135
+ # if the link is not a youtube video
136
+ else:
137
+ result = get_whisper_res_if_the_video_is_not_youtube_video(video_url)
138
+ if result:
139
+ transcript = postprocess_whisper_jax_output(result)
140
+ return transcript
141
+
142
+ return False
143
+