File size: 2,235 Bytes
2a1f5d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e315a8
 
 
 
 
 
 
 
 
 
 
 
 
 
2a1f5d0
 
 
8e315a8
2a1f5d0
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
import pickle
import torch
import io

class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else: return super().find_class(module, name)


def get_hugging_face_model():
    model_name = "mchochlov/codebert-base-cd-ft"
    hf = HuggingFaceEmbeddings(model_name=model_name)
    return hf


def get_db():
    with open("codesearchdb.pickle", "rb") as f:
        db = CPU_Unpickler(f).load()
    return db


def get_similar_links(query, db, embeddings):
    embedding_vector = embeddings.embed_query(query)
    docs_and_scores = db.similarity_search_by_vector(embedding_vector, k = 10)
    hrefs = []
    for docs in docs_and_scores:
        html_doc = docs.page_content
        soup = BeautifulSoup(html_doc, 'html.parser')
        href = [a['href'] for a in soup.find_all('a', href=True)]
        hrefs.append(href)
    links = []
    for href_list in hrefs:
        for link in href_list:
            links.append(link)
    return links


def find_similar_questions(text_input):
    embedding_vector = get_hugging_face_model()
    db = get_db()
    query = text_input
    answer = get_similar_links(query, db, embedding_vector)
    return "\n".join(set(answer))

s_example = """
class Solution(object):
    def isValid(self, s):
        stack = []
        mapping = {")": "(", "}": "{", "]": "["}
        for char in s:
            if char in mapping:
                top_element = stack.pop() if stack else '#'
                if mapping[char] != top_element:
                    return False
            else:
                stack.append(char)
        return not stack
"""

iface = gr.Interface(
    fn=find_similar_questions,
    inputs=gr.inputs.Textbox(lines=20, label="Enter a Code Example", value = s_example),
    outputs=gr.outputs.Textbox(label="Similar Questions on Leetcode"),
    title="πŸ“’ DSASearch Engine πŸ€–",
    description="Find similar questions on Leetcode based on a code example.",
    allow_flagging=False,
)

iface.launch()