Bofandra commited on
Commit
549b455
·
verified ·
1 Parent(s): f286ff5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -3,6 +3,7 @@ from sentence_transformers import SentenceTransformer
3
  import pandas as pd
4
  import pickle
5
  from pathlib import Path
 
6
 
7
  def make_clickable_both(val):
8
  name, url = val.split('#')
@@ -11,6 +12,8 @@ def make_clickable_both(val):
11
  return f'<a href="{url}">{name}</a>'
12
 
13
  def find(query):
 
 
14
  def get_detailed_instruct(task_description: str, query: str) -> str:
15
  return f'Instruct: {task_description}\nQuery: {query}'
16
 
@@ -19,14 +22,17 @@ def find(query):
19
  queries = [
20
  get_detailed_instruct(task, query)
21
  ]
22
- print("cekpoin0\n")
23
 
24
  quran = pd.read_csv('quran-eng.csv', delimiter=",")
 
25
 
26
  file = open('quran-splitted.sav','rb')
27
  quran_splitted = pickle.load(file)
 
 
28
 
29
  model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
 
30
 
31
  documents = quran_splitted['text'].tolist()
32
  # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
@@ -34,18 +40,21 @@ def find(query):
34
  # pickle.dump(embeddings, open(filename, 'wb'))
35
  file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
36
  document_embeddings = pickle.load(file)
37
- print("cekpoin1\n")
 
38
 
39
  query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
40
  scores = (query_embeddings @ document_embeddings.T) * 100
41
- print("cekpoin2\n")
 
42
 
43
  # insert the similarity value to dataframe & sort it
44
  file = open('quran-splitted.sav','rb')
45
  quran_splitted = pickle.load(file)
46
  quran_splitted['similarity'] = scores.tolist()[0]
47
  sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
48
- print("cekpoin3\n")
 
49
 
50
  #results = ""
51
  results = pd.DataFrame()
@@ -56,7 +65,9 @@ def find(query):
56
  results = pd.concat([results, result_quran])
57
  #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
58
  i=i+1
59
-
 
 
60
  url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
61
  results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'
62
  results = results.drop(columns=['sura', 'aya'])
 
3
  import pandas as pd
4
  import pickle
5
  from pathlib import Path
6
+ import time
7
 
8
  def make_clickable_both(val):
9
  name, url = val.split('#')
 
12
  return f'<a href="{url}">{name}</a>'
13
 
14
  def find(query):
15
+ print("start")
16
+ print(time.time())
17
  def get_detailed_instruct(task_description: str, query: str) -> str:
18
  return f'Instruct: {task_description}\nQuery: {query}'
19
 
 
22
  queries = [
23
  get_detailed_instruct(task, query)
24
  ]
 
25
 
26
  quran = pd.read_csv('quran-eng.csv', delimiter=",")
27
+ print(time.time())
28
 
29
  file = open('quran-splitted.sav','rb')
30
  quran_splitted = pickle.load(file)
31
+ print("load quran\n")
32
+ print(time.time())
33
 
34
  model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
35
+ print(time.time())
36
 
37
  documents = quran_splitted['text'].tolist()
38
  # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
 
40
  # pickle.dump(embeddings, open(filename, 'wb'))
41
  file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
42
  document_embeddings = pickle.load(file)
43
+ print("load quran embedding\n")
44
+ print(time.time())
45
 
46
  query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
47
  scores = (query_embeddings @ document_embeddings.T) * 100
48
+ print("count similarities\n")
49
+ print(time.time())
50
 
51
  # insert the similarity value to dataframe & sort it
52
  file = open('quran-splitted.sav','rb')
53
  quran_splitted = pickle.load(file)
54
  quran_splitted['similarity'] = scores.tolist()[0]
55
  sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
56
+ print("sort by similarity\n")
57
+ print(time.time())
58
 
59
  #results = ""
60
  results = pd.DataFrame()
 
65
  results = pd.concat([results, result_quran])
66
  #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
67
  i=i+1
68
+ print("collect results\n")
69
+ print(time.time())
70
+
71
  url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
72
  results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'
73
  results = results.drop(columns=['sura', 'aya'])