File size: 4,202 Bytes
c044854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e5262d
c044854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95ba4a0
 
c044854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from llama_index import Document, GPTListIndex, GPTSimpleVectorIndex
import gradio as gr
import openai
import os
from pytube import YouTube


def download_yt_video(ytlink):

    try:

        yt = YouTube(ytlink)

        video = yt.streams.filter(only_audio=True).first()

        out_file = video.download(output_path="./")

        base, ext = os.path.splitext(out_file)
        new_file = base + '.mp3'

        os.rename(out_file, new_file)

        return new_file
    except Exception as e:
        return e


def get_transcript(filename):
    import requests
    import json

    headers = {
        'accept': 'application/json',
        'x-gladia-key': '70ad5f6e-31e6-4acf-8a15-89c166c4cc9f',
        # requests won't add a boundary if this header is set when you pass files=
        # 'Content-Type': 'multipart/form-data',
    }

    files = {
        'audio': (filename, open(filename, 'rb'), 'audio/mpeg'),
        'audio_url': (None, 'http://files.gladia.io/example/audio-transcription/split_infinity.wav'),
        'language': (None, 'english'),
        'language_behaviour': (None, 'manual'),
        'output_format': (None, 'json'),
    }

    response = requests.post(
        'https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)

    data = json.loads(response.text)

    result = ""
    for dict_ in data['prediction']:
        result = result + dict_['transcription'] + " "

    result = ' '.join(result.strip().split())

    with open(f"{filename[:-4]}.txt", "w") as f:
        f.write(result)

    return result


def createindex(url, openaikey):

    try:
        filename = download_yt_video(url)

        transcript = get_transcript(filename)

        os.remove(filename)

        # Store openai key in environment
        os.environ['OPENAI_API_KEY'] = openaikey

        # Create index
        index = GPTListIndex([Document(transcript)], chunk_size_limit=2500)

        index_filename = "index.json"
        index.save_to_disk(index_filename)

        return "Video processed. Now you can start querying."
    except Exception as e:
        return e


def videoques(query, openaikey):

    # Basic Checks
    if not query:
        return "Please enter your query."

    # Basic Checks
    if not openaikey:
        return "Please enter openaikey."

    # Store openai key in environment
    os.environ['OPENAI_API_KEY'] = openaikey

    index_name = "index.json"

    index = GPTListIndex.load_from_disk(index_name)

    # Query based on index
    response = index.query(query, mode="embedding", similarity_top_k=4)

    return response


def cleartext(query, output):
    """
    Function to clear text
    """
    return ["", ""]


with gr.Blocks() as demo:
    gr.Markdown(
        """
    <h1><center><b>Portuguese VideoQues</center></h1>
    
    """)
    gr.Markdown(
        """
    Portuguese VideoQues answers your queries on any Portuguese video.
    
    """)
    with gr.Row():
        with gr.Column():
            url = gr.Textbox(lines=1, label="Enter Youtube Video link.")
            openaikey = gr.Textbox(lines=1, label="Enter Your OpenAI key.")
            submit1_button = gr.Button("Submit")
            ans1_output = gr.Textbox(label="Status.")
            clear1_button = gr.Button("Clear")
        with gr.Column():
            query = gr.Textbox(lines=2, label="Enter Your Query.")
            submit2_button = gr.Button("Submit")
            ans2_output = gr.Textbox(label="Answer.")
            clear2_button = gr.Button("Clear")

    # Submit button for showing YT Video thumbnail.
    submit1_button.click(createindex, inputs=[
                         url, openaikey], outputs=[ans1_output])

    # Submit button for submitting query.
    submit2_button.click(videoques, inputs=[
                         query, openaikey], outputs=[ans2_output])

    # Clear button for clearing query and answer.
    clear1_button.click(cleartext, inputs=[
                        url, ans1_output], outputs=[url, ans1_output])

    # Clear button for clearing query and answer.
    clear2_button.click(cleartext, inputs=[query, ans2_output], outputs=[
                        query, ans2_output])

demo.launch(debug=True)