File size: 3,992 Bytes
7ab5cc9
 
 
 
 
549b455
7ab5cc9
4525d51
4e43404
 
4525d51
4e43404
 
e8cda75
 
 
 
 
 
 
 
 
7ab5cc9
 
 
 
 
 
 
549b455
 
7ab5cc9
 
 
 
 
 
 
 
e9c92d6
 
 
 
 
7ab5cc9
e9c92d6
7ab5cc9
 
 
 
 
4525d51
 
7ab5cc9
4525d51
549b455
7ab5cc9
 
 
 
4525d51
e9c92d6
7ab5cc9
 
4525d51
549b455
7ab5cc9
 
 
 
955cdd2
7ab5cc9
 
 
 
 
4525d51
549b455
 
7ab5cc9
08e5eff
62c86fa
7ab5cc9
 
 
 
 
5a55441
 
 
 
7ab5cc9
 
 
 
5a55441
 
dc171bc
7ab5cc9
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
from pathlib import Path  
import time

print("load model start")
print(time.time())
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
print("load model end")
print(time.time())

quran = pd.read_csv('quran-eng.csv', delimiter=",")
print("load quran eng")
print(time.time())

file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
document_embeddings = pickle.load(file)
print("load quran embedding")
print(time.time())

def make_clickable_both(val): 
    name, url = val.split('#')
    print(name+"\n")
    print(url+"\n")
    return f'<a href="{url}">{name}</a>'

def find(query):
    print("start")
    print(time.time())
    def get_detailed_instruct(task_description: str, query: str) -> str:
        return f'Instruct: {task_description}\nQuery: {query}'
    
    # Each query must come with a one-sentence instruction that describes the task
    task = 'Given a web search query, retrieve relevant passages that answer the query'
    queries = [
        get_detailed_instruct(task, query)
    ]

    #file = open('quran-splitted.sav','rb')
    #quran_splitted = pickle.load(file)
    #print("load quran\n")
    #print(time.time())
    
    #documents = quran_splitted['text'].tolist()
    # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
    # filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
    # pickle.dump(embeddings, open(filename, 'wb'))
    
    query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
    print("embed query")
    print(time.time())
    scores = (query_embeddings @ document_embeddings.T) * 100
    print("count similarities")
    print(time.time())

    # insert the similarity value to dataframe & sort it
    file = open('quran-splitted.sav','rb')
    quran_splitted = pickle.load(file)
    print("load quran")
    print(time.time())
    quran_splitted['similarity'] = scores.tolist()[0]
    sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
    print("sort by similarity")
    print(time.time())
    
    #results = ""
    results = pd.DataFrame()
    i = 0
    while i<3:
        result = sorted_quran.iloc[i]
        result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
        results = pd.concat([results, result_quran])
        #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
        i=i+1
    print("collect results")
    print(time.time())
    
    url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
    results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'
    results = results.drop(columns=['sura', 'aya'])
    #results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
    
    #results = results.style.format({'text': make_clickable_both})
    
    #return sorted_quran
    #filepath = Path(query+'.csv')  
    #results.to_csv(filepath,index=False)  
    #return results, filepath
    return results
    
demo = gr.Interface(
    fn=find, 
    inputs="textbox", 
    #outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()],  
    outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True)],  
    cache_examples="lazy",
    examples=[
                ["law of inheritance in islam"],
                ["tunjukilah jalan yang lurus"],
                ["سليمان"],
            ],
    title="Quran Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
    
if __name__ == "__main__":
    demo.launch()