|
import pickle |
|
import pandas as pd |
|
from sentence_transformers import SentenceTransformer |
|
from sklearn.neighbors import NearestNeighbors |
|
import gradio as gr |
|
|
|
|
|
with open('embeddings.pkl', 'rb') as f: |
|
embeddings = pickle.load(f) |
|
|
|
|
|
nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings) |
|
|
|
|
|
df = pd.read_csv('quran_hadith.csv') |
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
def semantic_search(query, model, embeddings, nbrs): |
|
|
|
query_embedding = model.encode([query])[0] |
|
|
|
|
|
distances, indices = nbrs.kneighbors([query_embedding]) |
|
|
|
|
|
similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances[0])] |
|
return similar_sentences |
|
|
|
def search_interface(query): |
|
similar_sentences = semantic_search(query, model, embeddings, nbrs) |
|
sentences = [sentence for sentence, distance in similar_sentences] |
|
formatted_output = '\n\n'.join(sentences) |
|
return formatted_output |
|
|
|
pd.set_option('display.max_colwidth', None) |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=search_interface, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), |
|
outputs=gr.Textbox(label="Similar Sentences") |
|
) |
|
|
|
|
|
iface.launch(share=True) |
|
|