Bofandra commited on
Commit
0368e08
·
verified ·
1 Parent(s): e8cda75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -6,19 +6,19 @@ from pathlib import Path
6
  import time
7
 
8
  print("load model start")
9
- print(time.time())
10
  model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
11
  print("load model end")
12
- print(time.time())
13
 
14
  quran = pd.read_csv('quran-eng.csv', delimiter=",")
15
  print("load quran eng")
16
- print(time.time())
17
 
18
  file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
19
  document_embeddings = pickle.load(file)
20
  print("load quran embedding")
21
- print(time.time())
22
 
23
  def make_clickable_both(val):
24
  name, url = val.split('#')
@@ -28,7 +28,7 @@ def make_clickable_both(val):
28
 
29
  def find(query):
30
  print("start")
31
- print(time.time())
32
  def get_detailed_instruct(task_description: str, query: str) -> str:
33
  return f'Instruct: {task_description}\nQuery: {query}'
34
 
@@ -41,7 +41,7 @@ def find(query):
41
  #file = open('quran-splitted.sav','rb')
42
  #quran_splitted = pickle.load(file)
43
  #print("load quran\n")
44
- #print(time.time())
45
 
46
  #documents = quran_splitted['text'].tolist()
47
  # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
@@ -50,20 +50,20 @@ def find(query):
50
 
51
  query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
52
  print("embed query")
53
- print(time.time())
54
  scores = (query_embeddings @ document_embeddings.T) * 100
55
  print("count similarities")
56
- print(time.time())
57
 
58
  # insert the similarity value to dataframe & sort it
59
  file = open('quran-splitted.sav','rb')
60
  quran_splitted = pickle.load(file)
61
  print("load quran")
62
- print(time.time())
63
  quran_splitted['similarity'] = scores.tolist()[0]
64
  sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
65
  print("sort by similarity")
66
- print(time.time())
67
 
68
  #results = ""
69
  results = pd.DataFrame()
@@ -75,7 +75,7 @@ def find(query):
75
  #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
76
  i=i+1
77
  print("collect results")
78
- print(time.time())
79
 
80
  url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
81
  results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'
 
6
  import time
7
 
8
  print("load model start")
9
+ print(datetime.fromtimestamp(time.time()))
10
  model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
11
  print("load model end")
12
+ print(datetime.fromtimestamp(time.time()))
13
 
14
  quran = pd.read_csv('quran-eng.csv', delimiter=",")
15
  print("load quran eng")
16
+ print(datetime.fromtimestamp(time.time()))
17
 
18
  file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
19
  document_embeddings = pickle.load(file)
20
  print("load quran embedding")
21
+ print(datetime.fromtimestamp(time.time()))
22
 
23
  def make_clickable_both(val):
24
  name, url = val.split('#')
 
28
 
29
  def find(query):
30
  print("start")
31
+ print(datetime.fromtimestamp(time.time()))
32
  def get_detailed_instruct(task_description: str, query: str) -> str:
33
  return f'Instruct: {task_description}\nQuery: {query}'
34
 
 
41
  #file = open('quran-splitted.sav','rb')
42
  #quran_splitted = pickle.load(file)
43
  #print("load quran\n")
44
+ #print(datetime.fromtimestamp(time.time()))
45
 
46
  #documents = quran_splitted['text'].tolist()
47
  # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
 
50
 
51
  query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
52
  print("embed query")
53
+ print(datetime.fromtimestamp(time.time()))
54
  scores = (query_embeddings @ document_embeddings.T) * 100
55
  print("count similarities")
56
+ print(datetime.fromtimestamp(time.time()))
57
 
58
  # insert the similarity value to dataframe & sort it
59
  file = open('quran-splitted.sav','rb')
60
  quran_splitted = pickle.load(file)
61
  print("load quran")
62
+ print(datetime.fromtimestamp(time.time()))
63
  quran_splitted['similarity'] = scores.tolist()[0]
64
  sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
65
  print("sort by similarity")
66
+ print(datetime.fromtimestamp(time.time()))
67
 
68
  #results = ""
69
  results = pd.DataFrame()
 
75
  #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
76
  i=i+1
77
  print("collect results")
78
+ print(datetime.fromtimestamp(time.time()))
79
 
80
  url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
81
  results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'