jfataphd commited on
Commit
1edc895
·
1 Parent(s): 78a2dc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -78
app.py CHANGED
@@ -1,7 +1,9 @@
1
  import streamlit as st
2
  import time
3
  import concurrent.futures
4
- import json
 
 
5
 
6
  # import tensorflow
7
  from gensim.models import Word2Vec
@@ -18,11 +20,9 @@ import plotly.graph_objs as go
18
  from streamlit.components.v1 import html
19
 
20
  st.set_page_config(page_title="OncoDigger", page_icon=":microscope:", layout="wide", # centered
21
- initial_sidebar_state="auto",
22
- menu_items={'About': "OncoDigger is a Natural Language Processing (NLP) that harnesses Word2Vec to mine"
23
- " insight from pubmed abstracts. Created by Jimmie E. Fata, PhD, [email protected]"})
24
-
25
-
26
 
27
  analytics_code = '''
28
  <head>
@@ -40,7 +40,6 @@ analytics_code = '''
40
 
41
  html(analytics_code, height=0)
42
 
43
-
44
  # Define the HTML and CSS styles
45
  st.markdown("""
46
  <style>
@@ -78,8 +77,7 @@ def custom_subheader(text, identifier, font_size):
78
  custom_subheader("To begin, simply select a cancer corpus from the left sidebar and enter a keyword "
79
  "you wish to explore within the corpus. OncoDigger will determine the top words, "
80
  "genes, drugs, phytochemicals, and compounds that are contextually and semantically related "
81
- "to your input, both directly and indirectly. Dive in and enjoy the exploration!",
82
- "unique-id", 18)
83
 
84
  st.markdown("---")
85
 
@@ -98,10 +96,9 @@ st.markdown("---")
98
  #
99
  # # If the password is correct, show the app content
100
  # if authenticate(password):
101
- opt = st.sidebar.radio("Select a PubMed Corpus", options=('Breast Cancer corpus', 'Lung Cancer corpus',
102
- 'Skin Cancer corpus', 'Colorectal Cancer corpus',
103
- 'Prostate Cancer corpus', 'Lymphoma Cancer corpus', 'Urinary Cancer corpus',
104
- 'Kidney Cancer corpus'))
105
  # if opt == "Clotting corpus":
106
  # model_used = ("pubmed_model_clotting")
107
  # num_abstracts = 45493
@@ -141,8 +138,7 @@ if opt == "Urinary Cancer corpus":
141
  if opt == "Kidney Cancer corpus":
142
  model_used = ("kidney_cancer_pubmed_model")
143
  num_abstracts = 39016
144
- database_name = "Kidney_cancer"
145
-
146
 
147
  st.header(f":blue[{database_name} Pubmed corpus.]")
148
  text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
@@ -173,11 +169,12 @@ if query:
173
  model2 = model.wv[query]
174
  # print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
175
  df = pd.DataFrame(X)
 
 
 
 
 
176
 
177
- if 'melanin' in model.wv.key_to_index:
178
- print("The term 'melanin' is present in the model.")
179
- else:
180
- print("The term 'melanin' is not present in the model.")
181
 
182
  def get_compound_ids(compound_names):
183
  with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -199,9 +196,9 @@ if query:
199
  return None
200
 
201
 
202
- # except:
203
- # st.error("Term occurrence is too low - please try another term")
204
- # st.stop()
205
  st.markdown("---")
206
 
207
  try:
@@ -213,10 +210,9 @@ if query:
213
  pd.set_option('display.max_rows', None)
214
  table2 = table.copy()
215
 
216
- st.markdown(
217
- f"<h2 style='text-align: center; font-family: Arial; font-size: 20px; font-weight: bold;'>"
218
- f"Top <span style='color:red; font-style: italic;'>10000</span> words in an interactive embedding map most similar to <span style='color:red; font-style: italic;'>{query}</span> in <span style='color:red; font-style: italic;'>{database_name}</span> "
219
- f"PubMed corpus: Zoom in to the black diamond to find <span style='color:red; font-style: italic;'>{query}</span></h2>",
220
  unsafe_allow_html=True)
221
 
222
  # Set the max number of words to display
@@ -224,12 +220,24 @@ if query:
224
 
225
  try:
226
  value_word = min(50, len(table2))
227
- # Get the top 50 similar words to the query
228
- top_words = model.wv.most_similar_cosmul(query, topn=10000)
229
  words = [word for word, sim in top_words]
230
  words = [word.replace(' ', '-') for word in words]
231
  sims = [sim for word, sim in top_words]
232
- X_top = model.wv[words]
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  # Compute similarities between query and top 100 words
235
  sims_query_top = sims # print(sims_query_top)
@@ -237,7 +245,8 @@ if query:
237
  print("Error:", e)
238
 
239
  # Generate a 2D scatter plot of word embeddings using Plotly
240
- fig = px.scatter(x=X_top[:, 0], y=X_top[:, 1], color=sims_query_top, color_continuous_scale="RdYlGn", )
 
241
 
242
  # Change background color to black
243
  fig.update_layout(plot_bgcolor='#CCFFFF')
@@ -248,19 +257,19 @@ if query:
248
 
249
  # fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
250
  # fig.update_layout(title=dict(
251
- # text=f"Top 10000 words in an interactive embedding map for {query} in {database_name} PubMed corpus"
252
- # f": Zoom in to the black diamond to find {query}", x=0.5, y=1, xanchor='center', yanchor='top',
253
- # font=dict(color='black')))
254
  fig.update_coloraxes(colorbar_title=f"Similarity with {query}")
255
 
256
  # Represent query as a large red diamond
257
  fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='markers',
258
- marker=dict(size=12, color='black', symbol='diamond'), name=query, hovertext=query,
259
  showlegend=False))
260
 
261
  # Add label for the query above the diamond
262
  fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='text', text=[query],
263
- textposition='top right', textfont=dict(color='blue', size=10), hoverinfo='none',
264
  showlegend=False))
265
 
266
  # Add circles for the top 50 similar words
@@ -274,14 +283,12 @@ if query:
274
 
275
  st.plotly_chart(fig, use_container_width=True)
276
 
277
-
278
-
279
  st.markdown(
280
- f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
281
- f"</span>words contextually and semantically similar to "
282
- f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
283
- f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>",
284
- unsafe_allow_html=True)
285
 
286
  short_table = table2.head(value_word).round(2)
287
  short_table.index += 1
@@ -294,26 +301,28 @@ if query:
294
 
295
  df = short_table
296
 
297
-
298
  df['text'] = short_table.index
299
  df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
300
- '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
 
301
  df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
302
 
303
  df.loc[:, 'database'] = database_name
304
 
305
- fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
306
- hover_name=(table2.head(value_word)['SIMILARITY']))
 
307
 
308
  fig.update(layout_coloraxis_showscale=False)
309
  fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
310
  fig.update_annotations(visible=False)
311
  fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
312
- hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", texttemplate="<br><span "
313
- "style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
314
- "<a href='%{customdata[0]}'>PubMed"
315
- "</a><br><br><a href='%{customdata[3]}'>Wikipedia"
316
- "</span></a>")
 
317
  fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
318
 
319
  # st.pyplot(fig2)
@@ -325,7 +334,7 @@ if query:
325
 
326
  csv = table2.head(value_word).to_csv().encode('utf-8')
327
  st.download_button(label=f"download top {value_word} words (csv)", data=csv,
328
- file_name=f'{database_name}_words.csv', mime='text/csv')
329
 
330
  except:
331
  st.warning(
@@ -334,8 +343,6 @@ if query:
334
  st.warning(
335
  "This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
336
 
337
-
338
-
339
  # try:
340
  # value_word = min(50, len(table2))
341
  # # Get the top 50 similar words to the query
@@ -472,7 +479,8 @@ if query:
472
  "Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
473
  st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
474
  st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
475
- st.caption("In some cases genes may represent abbreviations of words and not genes, use pubmed link to confirm output is a gene")
 
476
 
477
  csv = df1.head(value_gene).to_csv().encode('utf-8')
478
  st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
@@ -502,49 +510,44 @@ if query:
502
  except Exception as e:
503
  print("Error:", e)
504
 
505
-
506
  # Remove the text "Similarity Score" from each element in the sims list
507
  sims_query_top = [float(sim.split()[-1]) for sim in sims]
508
  # print(sims_query_top)
509
 
510
  # Generate a 3D scatter plot of word embeddings using Plotly
511
  fig2 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
512
- color_continuous_scale="RdYlGn", hover_name=words,
513
- hover_data={"color": sims_query_top})
514
 
515
  # Change background color to black
516
  fig2.update_layout(scene=dict(bgcolor='#CCFFFF'))
517
 
518
  # Change color of text to white
519
  fig2.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
520
- yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
521
- zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
522
 
523
- fig2.update_traces(
524
- hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
525
  fig2.update_layout(
526
- title=dict(text=f"", x=0.5, y=0.95,
527
- xanchor='center', yanchor='top', font=dict(color='black')),
528
- scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
529
  fig2.update_coloraxes(colorbar_title=f"Similarity with {query}")
530
 
531
  # Represent query as a large red diamond
532
  fig2.add_trace(
533
- go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
534
- marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
535
- showlegend=False))
536
 
537
  # Add label for the query above the diamond
538
- fig2.add_trace(
539
- go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
540
- text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
541
- hoverinfo='none', showlegend=False))
542
 
543
  # Add circles for the top 50 similar words
544
  fig2.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
545
- marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
546
- hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
547
- text=words, customdata=sims, name=''))
548
 
549
  fig2.update(layout_coloraxis_showscale=True)
550
  fig2.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
@@ -872,10 +875,8 @@ if query:
872
  # st.plotly_chart(fig4, use_container_width=True)
873
  # st.markdown("---")
874
 
875
-
876
  st.markdown("---")
877
 
878
-
879
  # print()
880
  # print("Human genes similar to " + str(query))
881
  df1 = table.copy()
@@ -1159,7 +1160,6 @@ if query:
1159
  6. [Cosine Similarity Calculator](https://www.omnicalculator.com/math/cosine-similarity) - A calculator for computing cosine similarity, a common metric used in measuring similarity between vectors.
1160
  """)
1161
 
1162
-
1163
  # else:
1164
  # st.error("The password you entered is incorrect.")
1165
 
 
1
  import streamlit as st
2
  import time
3
  import concurrent.futures
4
+ # import json
5
+ from sklearn.manifold import TSNE
6
+ # import umap
7
 
8
  # import tensorflow
9
  from gensim.models import Word2Vec
 
20
  from streamlit.components.v1 import html
21
 
22
  st.set_page_config(page_title="OncoDigger", page_icon=":microscope:", layout="wide", # centered
23
+ initial_sidebar_state="auto", menu_items={
24
+ 'About': "OncoDigger is a Natural Language Processing (NLP) that harnesses Word2Vec to mine"
25
+ " insight from pubmed abstracts. Created by Jimmie E. Fata, PhD, [email protected]"})
 
 
26
 
27
  analytics_code = '''
28
  <head>
 
40
 
41
  html(analytics_code, height=0)
42
 
 
43
  # Define the HTML and CSS styles
44
  st.markdown("""
45
  <style>
 
77
  custom_subheader("To begin, simply select a cancer corpus from the left sidebar and enter a keyword "
78
  "you wish to explore within the corpus. OncoDigger will determine the top words, "
79
  "genes, drugs, phytochemicals, and compounds that are contextually and semantically related "
80
+ "to your input, both directly and indirectly. Dive in and enjoy the exploration!", "unique-id", 18)
 
81
 
82
  st.markdown("---")
83
 
 
96
  #
97
  # # If the password is correct, show the app content
98
  # if authenticate(password):
99
+ opt = st.sidebar.radio("Select a PubMed Corpus", options=(
100
+ 'Breast Cancer corpus', 'Lung Cancer corpus', 'Skin Cancer corpus', 'Colorectal Cancer corpus',
101
+ 'Prostate Cancer corpus', 'Lymphoma Cancer corpus', 'Urinary Cancer corpus', 'Kidney Cancer corpus'))
 
102
  # if opt == "Clotting corpus":
103
  # model_used = ("pubmed_model_clotting")
104
  # num_abstracts = 45493
 
138
  if opt == "Kidney Cancer corpus":
139
  model_used = ("kidney_cancer_pubmed_model")
140
  num_abstracts = 39016
141
+ database_name = "Kidney_cancer"
 
142
 
143
  st.header(f":blue[{database_name} Pubmed corpus.]")
144
  text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
 
169
  model2 = model.wv[query]
170
  # print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
171
  df = pd.DataFrame(X)
172
+ #
173
+ # if 'melanin' in model.wv.key_to_index:
174
+ # print("The term 'melanin' is present in the model.")
175
+ # else:
176
+ # print("The term 'melanin' is not present in the model.")
177
 
 
 
 
 
178
 
179
  def get_compound_ids(compound_names):
180
  with concurrent.futures.ThreadPoolExecutor() as executor:
 
196
  return None
197
 
198
 
199
+ # except:
200
+ # st.error("Term occurrence is too low - please try another term")
201
+ # st.stop()
202
  st.markdown("---")
203
 
204
  try:
 
210
  pd.set_option('display.max_rows', None)
211
  table2 = table.copy()
212
 
213
+ st.markdown(f"<h2 style='text-align: center; font-family: Arial; font-size: 20px; font-weight: bold;'>"
214
+ f"Top <span style='color:red; font-style: italic;'>500</span> words in a dimension-reduced embedding map showing similarity to <span style='color:red; font-style: italic;'>{query}</span> in <span style='color:red; font-style: italic;'>{database_name}</span> "
215
+ f"corpus</span></h2>",
 
216
  unsafe_allow_html=True)
217
 
218
  # Set the max number of words to display
 
220
 
221
  try:
222
  value_word = min(50, len(table2))
223
+ # Get the top 10000 similar words to the query
224
+ top_words = model.wv.most_similar_cosmul(query, topn=500)
225
  words = [word for word, sim in top_words]
226
  words = [word.replace(' ', '-') for word in words]
227
  sims = [sim for word, sim in top_words]
228
+ X = model.wv[words]
229
+
230
+ # Add the query to the list of words and the embeddings array
231
+ words_with_query = [query] + words
232
+ X_with_query = np.vstack((model.wv[[query]], X))
233
+
234
+ # Perform t-SNE
235
+ tsne = TSNE(n_components=2, random_state=42)
236
+ X_tsne = tsne.fit_transform(X_with_query)
237
+
238
+ # Extract the t-SNE-transformed coordinates of the query and the top words
239
+ query_tsne = X_tsne[0]
240
+ X_top = X_tsne[1:]
241
 
242
  # Compute similarities between query and top 100 words
243
  sims_query_top = sims # print(sims_query_top)
 
245
  print("Error:", e)
246
 
247
  # Generate a 2D scatter plot of word embeddings using Plotly
248
+ fig = px.scatter(x=X_top[:, 0], y=X_top[:, 1], color=sims, color_continuous_scale="RdYlGn")
249
+
250
 
251
  # Change background color to black
252
  fig.update_layout(plot_bgcolor='#CCFFFF')
 
257
 
258
  # fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
259
  # fig.update_layout(title=dict(
260
+ # text=f"Top 10000 words in an interactive embedding map for {query} in {database_name} PubMed corpus"
261
+ # f": Zoom in to the black diamond to find {query}", x=0.5, y=1, xanchor='center', yanchor='top',
262
+ # font=dict(color='black')))
263
  fig.update_coloraxes(colorbar_title=f"Similarity with {query}")
264
 
265
  # Represent query as a large red diamond
266
  fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='markers',
267
+ marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
268
  showlegend=False))
269
 
270
  # Add label for the query above the diamond
271
  fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='text', text=[query],
272
+ textposition='top right', textfont=dict(color='blue', size=12), hoverinfo='none',
273
  showlegend=False))
274
 
275
  # Add circles for the top 50 similar words
 
283
 
284
  st.plotly_chart(fig, use_container_width=True)
285
 
 
 
286
  st.markdown(
287
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
288
+ f"</span>words contextually and semantically similar to "
289
+ f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
290
+ f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>",
291
+ unsafe_allow_html=True)
292
 
293
  short_table = table2.head(value_word).round(2)
294
  short_table.index += 1
 
301
 
302
  df = short_table
303
 
 
304
  df['text'] = short_table.index
305
  df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
306
+ '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in
307
+ short_table.index]
308
  df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
309
 
310
  df.loc[:, 'database'] = database_name
311
 
312
+ fig = px.treemap(df, path=[short_table.index], values=sizes,
313
+ custom_data=['href', 'text', 'database', 'href2'],
314
+ hover_name=(table2.head(value_word)['SIMILARITY']))
315
 
316
  fig.update(layout_coloraxis_showscale=False)
317
  fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
318
  fig.update_annotations(visible=False)
319
  fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
320
+ hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
321
+ texttemplate="<br><span "
322
+ "style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
323
+ "<a href='%{customdata[0]}'>PubMed"
324
+ "</a><br><br><a href='%{customdata[3]}'>Wikipedia"
325
+ "</span></a>")
326
  fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
327
 
328
  # st.pyplot(fig2)
 
334
 
335
  csv = table2.head(value_word).to_csv().encode('utf-8')
336
  st.download_button(label=f"download top {value_word} words (csv)", data=csv,
337
+ file_name=f'{database_name}_words.csv', mime='text/csv')
338
 
339
  except:
340
  st.warning(
 
343
  st.warning(
344
  "This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
345
 
 
 
346
  # try:
347
  # value_word = min(50, len(table2))
348
  # # Get the top 50 similar words to the query
 
479
  "Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
480
  st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
481
  st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
482
+ st.caption(
483
+ "In some cases genes may represent abbreviations of words and not genes, use pubmed link to confirm output is a gene")
484
 
485
  csv = df1.head(value_gene).to_csv().encode('utf-8')
486
  st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
 
510
  except Exception as e:
511
  print("Error:", e)
512
 
 
513
  # Remove the text "Similarity Score" from each element in the sims list
514
  sims_query_top = [float(sim.split()[-1]) for sim in sims]
515
  # print(sims_query_top)
516
 
517
  # Generate a 3D scatter plot of word embeddings using Plotly
518
  fig2 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
519
+ color_continuous_scale="RdYlGn", hover_name=words, hover_data={"color": sims_query_top})
 
520
 
521
  # Change background color to black
522
  fig2.update_layout(scene=dict(bgcolor='#CCFFFF'))
523
 
524
  # Change color of text to white
525
  fig2.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
526
+ yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
527
+ zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
528
 
529
+ fig2.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
 
530
  fig2.update_layout(
531
+ title=dict(text=f"", x=0.5, y=0.95, xanchor='center', yanchor='top', font=dict(color='black')),
532
+ scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
 
533
  fig2.update_coloraxes(colorbar_title=f"Similarity with {query}")
534
 
535
  # Represent query as a large red diamond
536
  fig2.add_trace(
537
+ go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
538
+ marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
539
+ showlegend=False))
540
 
541
  # Add label for the query above the diamond
542
+ fig2.add_trace(go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
543
+ text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
544
+ hoverinfo='none', showlegend=False))
 
545
 
546
  # Add circles for the top 50 similar words
547
  fig2.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
548
+ marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
549
+ hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
550
+ text=words, customdata=sims, name=''))
551
 
552
  fig2.update(layout_coloraxis_showscale=True)
553
  fig2.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
 
875
  # st.plotly_chart(fig4, use_container_width=True)
876
  # st.markdown("---")
877
 
 
878
  st.markdown("---")
879
 
 
880
  # print()
881
  # print("Human genes similar to " + str(query))
882
  df1 = table.copy()
 
1160
  6. [Cosine Similarity Calculator](https://www.omnicalculator.com/math/cosine-similarity) - A calculator for computing cosine similarity, a common metric used in measuring similarity between vectors.
1161
  """)
1162
 
 
1163
  # else:
1164
  # st.error("The password you entered is incorrect.")
1165