Spaces:
Runtime error
Runtime error
File size: 4,202 Bytes
c044854 5e5262d c044854 95ba4a0 c044854 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
from llama_index import Document, GPTListIndex, GPTSimpleVectorIndex
import gradio as gr
import openai
import os
from pytube import YouTube
def download_yt_video(ytlink):
try:
yt = YouTube(ytlink)
video = yt.streams.filter(only_audio=True).first()
out_file = video.download(output_path="./")
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
return new_file
except Exception as e:
return e
def get_transcript(filename):
import requests
import json
headers = {
'accept': 'application/json',
'x-gladia-key': '70ad5f6e-31e6-4acf-8a15-89c166c4cc9f',
# requests won't add a boundary if this header is set when you pass files=
# 'Content-Type': 'multipart/form-data',
}
files = {
'audio': (filename, open(filename, 'rb'), 'audio/mpeg'),
'audio_url': (None, 'http://files.gladia.io/example/audio-transcription/split_infinity.wav'),
'language': (None, 'english'),
'language_behaviour': (None, 'manual'),
'output_format': (None, 'json'),
}
response = requests.post(
'https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
data = json.loads(response.text)
result = ""
for dict_ in data['prediction']:
result = result + dict_['transcription'] + " "
result = ' '.join(result.strip().split())
with open(f"{filename[:-4]}.txt", "w") as f:
f.write(result)
return result
def createindex(url, openaikey):
try:
filename = download_yt_video(url)
transcript = get_transcript(filename)
os.remove(filename)
# Store openai key in environment
os.environ['OPENAI_API_KEY'] = openaikey
# Create index
index = GPTListIndex([Document(transcript)], chunk_size_limit=2500)
index_filename = "index.json"
index.save_to_disk(index_filename)
return "Video processed. Now you can start querying."
except Exception as e:
return e
def videoques(query, openaikey):
# Basic Checks
if not query:
return "Please enter your query."
# Basic Checks
if not openaikey:
return "Please enter openaikey."
# Store openai key in environment
os.environ['OPENAI_API_KEY'] = openaikey
index_name = "index.json"
index = GPTListIndex.load_from_disk(index_name)
# Query based on index
response = index.query(query, mode="embedding", similarity_top_k=4)
return response
def cleartext(query, output):
"""
Function to clear text
"""
return ["", ""]
with gr.Blocks() as demo:
gr.Markdown(
"""
<h1><center><b>Portuguese VideoQues</center></h1>
""")
gr.Markdown(
"""
Portuguese VideoQues answers your queries on any Portuguese video.
""")
with gr.Row():
with gr.Column():
url = gr.Textbox(lines=1, label="Enter Youtube Video link.")
openaikey = gr.Textbox(lines=1, label="Enter Your OpenAI key.")
submit1_button = gr.Button("Submit")
ans1_output = gr.Textbox(label="Status.")
clear1_button = gr.Button("Clear")
with gr.Column():
query = gr.Textbox(lines=2, label="Enter Your Query.")
submit2_button = gr.Button("Submit")
ans2_output = gr.Textbox(label="Answer.")
clear2_button = gr.Button("Clear")
# Submit button for showing YT Video thumbnail.
submit1_button.click(createindex, inputs=[
url, openaikey], outputs=[ans1_output])
# Submit button for submitting query.
submit2_button.click(videoques, inputs=[
query, openaikey], outputs=[ans2_output])
# Clear button for clearing query and answer.
clear1_button.click(cleartext, inputs=[
url, ans1_output], outputs=[url, ans1_output])
# Clear button for clearing query and answer.
clear2_button.click(cleartext, inputs=[query, ans2_output], outputs=[
query, ans2_output])
demo.launch(debug=True)
|