File size: 2,539 Bytes
8cca354
 
 
b9b440a
8cca354
 
 
b9b440a
 
 
 
 
0326b2c
b9b440a
 
 
 
 
 
 
 
8cca354
 
 
 
 
b9b440a
63cb040
 
 
 
 
 
 
 
 
 
b9b440a
8cca354
 
63cb040
b9b440a
 
63cb040
 
 
 
 
b9b440a
63cb040
b9b440a
 
 
 
8cca354
b9b440a
8cca354
b9b440a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from sentence_transformers import SentenceTransformer
import pickle
import numpy as np

import torch
import gradio as gr

import os
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper


infer_model = whisper.load_model("base")


def infer(audio):
    result = infer_model.transcribe(audio)
    return result["text"]


model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

with open("dep_course_title_to_content_embed.pickle", "rb") as handle:
    loaded_map = pickle.load(handle)

dep_name_course_name = list(loaded_map.keys())
deps = list(set([x for (x, y) in dep_name_course_name]))
dep_to_course_name = {}
dep_to_course_embedding = {}

for dep in deps:
    dep_to_course_name[dep] = []
    dep_to_course_embedding[dep] = []

for (dep_name, course_name), embedding in loaded_map.items():
    # print(embedding.shape)
    dep_to_course_name[dep_name].append(course_name)
    dep_to_course_embedding[dep_name].append(np.array(embedding, dtype=np.float32))

cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)


def give_best_match(query, audio, Department):
    if not Department:
        Department = deps
    course_titles = []
    course_content_embeddings = []
    for dep in Department:
        course_titles += dep_to_course_name[dep]
        course_content_embeddings += dep_to_course_embedding[dep]
    course_content_embeddings = np.stack(course_content_embeddings)

    if audio:
        query = infer(audio)
    embed = model.encode(query)
    result = cos(torch.from_numpy(course_content_embeddings), torch.from_numpy(embed))
    indices = reversed(np.argsort(result))
    predictions = {course_titles[i]: float(result[i]) for i in indices}
    return query, predictions


demo = gr.Interface(
    fn=give_best_match,
    inputs=[
        gr.Textbox(
            label="Describe the course",
            lines=5,
            placeholder="Type anything related to course/s\n\nTitle, Topics/Sub Topics, Refernce books, Questions asked in exams or some random fun stuff.",
        ),
        gr.Audio(source="microphone", type="filepath", label = "Don't want to type, Try Describing using your sweet voice!!", interactive= True),
        gr.CheckboxGroup(deps, label="(Optional) Departments"),
    ],
    outputs=[
        gr.Textbox(
            label="Query",
            lines=2,
        ),
        gr.Label(label="Most Relevant Courses", num_top_classes=5),
    ],
)


# demo = gr.Interface(
#     fn=infer, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text"
# )

demo.launch()