mhdhrubo commited on
Commit
7008b1f
·
verified ·
1 Parent(s): e35f719

updating functions

Browse files
Files changed (1) hide show
  1. app.py +47 -43
app.py CHANGED
@@ -1,43 +1,47 @@
1
- import pickle
2
- import pandas as pd
3
- from sentence_transformers import SentenceTransformer
4
- from sklearn.neighbors import NearestNeighbors
5
- import gradio as gr
6
-
7
- # Load the embeddings from the file
8
- with open('embeddings.pkl', 'rb') as f:
9
- embeddings = pickle.load(f)
10
-
11
- # Initialize the Nearest Neighbors model with cosine similarity
12
- nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings)
13
-
14
- # Load the dataset
15
- df = pd.read_csv('quran_hadith.csv')
16
-
17
- # Initialize the SentenceTransformer model
18
- model = SentenceTransformer('all-MiniLM-L6-v2')
19
-
20
- def semantic_search(query, model, embeddings, nbrs):
21
- query_embedding = model.encode([query])[0]
22
- distances, indices = nbrs.kneighbors([query_embedding])
23
- similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[:10], distances)]
24
- return similar_sentences
25
-
26
- # Gradio function
27
- def search_interface(query):
28
- similar_sentences = semantic_search(query, model, embeddings, nbrs)
29
- sentences = [sentence for sentence, distance in similar_sentences]
30
- return sentences
31
-
32
- pd.set_option('display.max_colwidth', None)
33
-
34
-
35
- # Create Gradio interface
36
- iface = gr.Interface(
37
- fn=search_interface,
38
- inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
39
- outputs=gr.Textbox(label="Similar Sentences")
40
- )
41
-
42
- # Launch the interface
43
- iface.launch(share=True)
 
 
 
 
 
1
+ import pickle
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.neighbors import NearestNeighbors
5
+ import gradio as gr
6
+
7
+ # Load the embeddings from the file
8
+ with open('embeddings.pkl', 'rb') as f:
9
+ embeddings = pickle.load(f)
10
+
11
+ # Initialize the Nearest Neighbors model with cosine similarity
12
+ nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings)
13
+
14
+ # Load the dataset
15
+ df = pd.read_csv('quran_hadith.csv')
16
+
17
+ # Initialize the SentenceTransformer model
18
+ model = SentenceTransformer('all-MiniLM-L6-v2')
19
+
20
+ def semantic_search(query, model, embeddings, nbrs):
21
+ # Encode the query
22
+ query_embedding = model.encode([query])[0]
23
+
24
+ # Find the k nearest neighbors
25
+ distances, indices = nbrs.kneighbors([query_embedding])
26
+
27
+ # Return the k most similar sentences and their distances
28
+ similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances[0])]
29
+ return similar_sentences
30
+
31
+ def search_interface(query):
32
+ similar_sentences = semantic_search(query, model, embeddings, nbrs)
33
+ sentences = [sentence for sentence, distance in similar_sentences]
34
+ return sentences
35
+
36
+ pd.set_option('display.max_colwidth', None)
37
+
38
+
39
+ # Create Gradio interface
40
+ iface = gr.Interface(
41
+ fn=search_interface,
42
+ inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
43
+ outputs=gr.Textbox(label="Similar Sentences")
44
+ )
45
+
46
+ # Launch the interface
47
+ iface.launch(share=True)