Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import streamlit as st
|
2 |
import time
|
3 |
import concurrent.futures
|
4 |
-
import json
|
|
|
|
|
5 |
|
6 |
# import tensorflow
|
7 |
from gensim.models import Word2Vec
|
@@ -18,11 +20,9 @@ import plotly.graph_objs as go
|
|
18 |
from streamlit.components.v1 import html
|
19 |
|
20 |
st.set_page_config(page_title="OncoDigger", page_icon=":microscope:", layout="wide", # centered
|
21 |
-
initial_sidebar_state="auto",
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
analytics_code = '''
|
28 |
<head>
|
@@ -40,7 +40,6 @@ analytics_code = '''
|
|
40 |
|
41 |
html(analytics_code, height=0)
|
42 |
|
43 |
-
|
44 |
# Define the HTML and CSS styles
|
45 |
st.markdown("""
|
46 |
<style>
|
@@ -78,8 +77,7 @@ def custom_subheader(text, identifier, font_size):
|
|
78 |
custom_subheader("To begin, simply select a cancer corpus from the left sidebar and enter a keyword "
|
79 |
"you wish to explore within the corpus. OncoDigger will determine the top words, "
|
80 |
"genes, drugs, phytochemicals, and compounds that are contextually and semantically related "
|
81 |
-
"to your input, both directly and indirectly. Dive in and enjoy the exploration!",
|
82 |
-
"unique-id", 18)
|
83 |
|
84 |
st.markdown("---")
|
85 |
|
@@ -98,10 +96,9 @@ st.markdown("---")
|
|
98 |
#
|
99 |
# # If the password is correct, show the app content
|
100 |
# if authenticate(password):
|
101 |
-
opt = st.sidebar.radio("Select a PubMed Corpus", options=(
|
102 |
-
|
103 |
-
|
104 |
-
'Kidney Cancer corpus'))
|
105 |
# if opt == "Clotting corpus":
|
106 |
# model_used = ("pubmed_model_clotting")
|
107 |
# num_abstracts = 45493
|
@@ -141,8 +138,7 @@ if opt == "Urinary Cancer corpus":
|
|
141 |
if opt == "Kidney Cancer corpus":
|
142 |
model_used = ("kidney_cancer_pubmed_model")
|
143 |
num_abstracts = 39016
|
144 |
-
database_name = "Kidney_cancer"
|
145 |
-
|
146 |
|
147 |
st.header(f":blue[{database_name} Pubmed corpus.]")
|
148 |
text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
|
@@ -173,11 +169,12 @@ if query:
|
|
173 |
model2 = model.wv[query]
|
174 |
# print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
|
175 |
df = pd.DataFrame(X)
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
-
if 'melanin' in model.wv.key_to_index:
|
178 |
-
print("The term 'melanin' is present in the model.")
|
179 |
-
else:
|
180 |
-
print("The term 'melanin' is not present in the model.")
|
181 |
|
182 |
def get_compound_ids(compound_names):
|
183 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
@@ -199,9 +196,9 @@ if query:
|
|
199 |
return None
|
200 |
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
st.markdown("---")
|
206 |
|
207 |
try:
|
@@ -213,10 +210,9 @@ if query:
|
|
213 |
pd.set_option('display.max_rows', None)
|
214 |
table2 = table.copy()
|
215 |
|
216 |
-
st.markdown(
|
217 |
-
|
218 |
-
|
219 |
-
f"PubMed corpus: Zoom in to the black diamond to find <span style='color:red; font-style: italic;'>{query}</span></h2>",
|
220 |
unsafe_allow_html=True)
|
221 |
|
222 |
# Set the max number of words to display
|
@@ -224,12 +220,24 @@ if query:
|
|
224 |
|
225 |
try:
|
226 |
value_word = min(50, len(table2))
|
227 |
-
# Get the top
|
228 |
-
top_words = model.wv.most_similar_cosmul(query, topn=
|
229 |
words = [word for word, sim in top_words]
|
230 |
words = [word.replace(' ', '-') for word in words]
|
231 |
sims = [sim for word, sim in top_words]
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
# Compute similarities between query and top 100 words
|
235 |
sims_query_top = sims # print(sims_query_top)
|
@@ -237,7 +245,8 @@ if query:
|
|
237 |
print("Error:", e)
|
238 |
|
239 |
# Generate a 2D scatter plot of word embeddings using Plotly
|
240 |
-
fig = px.scatter(x=X_top[:, 0], y=X_top[:, 1], color=
|
|
|
241 |
|
242 |
# Change background color to black
|
243 |
fig.update_layout(plot_bgcolor='#CCFFFF')
|
@@ -248,19 +257,19 @@ if query:
|
|
248 |
|
249 |
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
|
250 |
# fig.update_layout(title=dict(
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
fig.update_coloraxes(colorbar_title=f"Similarity with {query}")
|
255 |
|
256 |
# Represent query as a large red diamond
|
257 |
fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='markers',
|
258 |
-
marker=dict(size=
|
259 |
showlegend=False))
|
260 |
|
261 |
# Add label for the query above the diamond
|
262 |
fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='text', text=[query],
|
263 |
-
textposition='top right', textfont=dict(color='blue', size=
|
264 |
showlegend=False))
|
265 |
|
266 |
# Add circles for the top 50 similar words
|
@@ -274,14 +283,12 @@ if query:
|
|
274 |
|
275 |
st.plotly_chart(fig, use_container_width=True)
|
276 |
|
277 |
-
|
278 |
-
|
279 |
st.markdown(
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
|
286 |
short_table = table2.head(value_word).round(2)
|
287 |
short_table.index += 1
|
@@ -294,26 +301,28 @@ if query:
|
|
294 |
|
295 |
df = short_table
|
296 |
|
297 |
-
|
298 |
df['text'] = short_table.index
|
299 |
df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
|
300 |
-
|
|
|
301 |
df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
|
302 |
|
303 |
df.loc[:, 'database'] = database_name
|
304 |
|
305 |
-
fig = px.treemap(df, path=[short_table.index], values=sizes,
|
306 |
-
|
|
|
307 |
|
308 |
fig.update(layout_coloraxis_showscale=False)
|
309 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
|
310 |
fig.update_annotations(visible=False)
|
311 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
317 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
|
318 |
|
319 |
# st.pyplot(fig2)
|
@@ -325,7 +334,7 @@ if query:
|
|
325 |
|
326 |
csv = table2.head(value_word).to_csv().encode('utf-8')
|
327 |
st.download_button(label=f"download top {value_word} words (csv)", data=csv,
|
328 |
-
|
329 |
|
330 |
except:
|
331 |
st.warning(
|
@@ -334,8 +343,6 @@ if query:
|
|
334 |
st.warning(
|
335 |
"This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
|
336 |
|
337 |
-
|
338 |
-
|
339 |
# try:
|
340 |
# value_word = min(50, len(table2))
|
341 |
# # Get the top 50 similar words to the query
|
@@ -472,7 +479,8 @@ if query:
|
|
472 |
"Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
|
473 |
st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
|
474 |
st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
|
475 |
-
st.caption(
|
|
|
476 |
|
477 |
csv = df1.head(value_gene).to_csv().encode('utf-8')
|
478 |
st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
|
@@ -502,49 +510,44 @@ if query:
|
|
502 |
except Exception as e:
|
503 |
print("Error:", e)
|
504 |
|
505 |
-
|
506 |
# Remove the text "Similarity Score" from each element in the sims list
|
507 |
sims_query_top = [float(sim.split()[-1]) for sim in sims]
|
508 |
# print(sims_query_top)
|
509 |
|
510 |
# Generate a 3D scatter plot of word embeddings using Plotly
|
511 |
fig2 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
|
512 |
-
|
513 |
-
hover_data={"color": sims_query_top})
|
514 |
|
515 |
# Change background color to black
|
516 |
fig2.update_layout(scene=dict(bgcolor='#CCFFFF'))
|
517 |
|
518 |
# Change color of text to white
|
519 |
fig2.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
|
520 |
-
|
521 |
-
|
522 |
|
523 |
-
fig2.update_traces(
|
524 |
-
hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
|
525 |
fig2.update_layout(
|
526 |
-
|
527 |
-
|
528 |
-
scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
|
529 |
fig2.update_coloraxes(colorbar_title=f"Similarity with {query}")
|
530 |
|
531 |
# Represent query as a large red diamond
|
532 |
fig2.add_trace(
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
|
537 |
# Add label for the query above the diamond
|
538 |
-
fig2.add_trace(
|
539 |
-
|
540 |
-
|
541 |
-
hoverinfo='none', showlegend=False))
|
542 |
|
543 |
# Add circles for the top 50 similar words
|
544 |
fig2.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
|
549 |
fig2.update(layout_coloraxis_showscale=True)
|
550 |
fig2.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
|
@@ -872,10 +875,8 @@ if query:
|
|
872 |
# st.plotly_chart(fig4, use_container_width=True)
|
873 |
# st.markdown("---")
|
874 |
|
875 |
-
|
876 |
st.markdown("---")
|
877 |
|
878 |
-
|
879 |
# print()
|
880 |
# print("Human genes similar to " + str(query))
|
881 |
df1 = table.copy()
|
@@ -1159,7 +1160,6 @@ if query:
|
|
1159 |
6. [Cosine Similarity Calculator](https://www.omnicalculator.com/math/cosine-similarity) - A calculator for computing cosine similarity, a common metric used in measuring similarity between vectors.
|
1160 |
""")
|
1161 |
|
1162 |
-
|
1163 |
# else:
|
1164 |
# st.error("The password you entered is incorrect.")
|
1165 |
|
|
|
1 |
import streamlit as st
|
2 |
import time
|
3 |
import concurrent.futures
|
4 |
+
# import json
|
5 |
+
from sklearn.manifold import TSNE
|
6 |
+
# import umap
|
7 |
|
8 |
# import tensorflow
|
9 |
from gensim.models import Word2Vec
|
|
|
20 |
from streamlit.components.v1 import html
|
21 |
|
22 |
st.set_page_config(page_title="OncoDigger", page_icon=":microscope:", layout="wide", # centered
|
23 |
+
initial_sidebar_state="auto", menu_items={
|
24 |
+
'About': "OncoDigger is a Natural Language Processing (NLP) that harnesses Word2Vec to mine"
|
25 |
+
" insight from pubmed abstracts. Created by Jimmie E. Fata, PhD, [email protected]"})
|
|
|
|
|
26 |
|
27 |
analytics_code = '''
|
28 |
<head>
|
|
|
40 |
|
41 |
html(analytics_code, height=0)
|
42 |
|
|
|
43 |
# Define the HTML and CSS styles
|
44 |
st.markdown("""
|
45 |
<style>
|
|
|
77 |
custom_subheader("To begin, simply select a cancer corpus from the left sidebar and enter a keyword "
|
78 |
"you wish to explore within the corpus. OncoDigger will determine the top words, "
|
79 |
"genes, drugs, phytochemicals, and compounds that are contextually and semantically related "
|
80 |
+
"to your input, both directly and indirectly. Dive in and enjoy the exploration!", "unique-id", 18)
|
|
|
81 |
|
82 |
st.markdown("---")
|
83 |
|
|
|
96 |
#
|
97 |
# # If the password is correct, show the app content
|
98 |
# if authenticate(password):
|
99 |
+
opt = st.sidebar.radio("Select a PubMed Corpus", options=(
|
100 |
+
'Breast Cancer corpus', 'Lung Cancer corpus', 'Skin Cancer corpus', 'Colorectal Cancer corpus',
|
101 |
+
'Prostate Cancer corpus', 'Lymphoma Cancer corpus', 'Urinary Cancer corpus', 'Kidney Cancer corpus'))
|
|
|
102 |
# if opt == "Clotting corpus":
|
103 |
# model_used = ("pubmed_model_clotting")
|
104 |
# num_abstracts = 45493
|
|
|
138 |
if opt == "Kidney Cancer corpus":
|
139 |
model_used = ("kidney_cancer_pubmed_model")
|
140 |
num_abstracts = 39016
|
141 |
+
database_name = "Kidney_cancer"
|
|
|
142 |
|
143 |
st.header(f":blue[{database_name} Pubmed corpus.]")
|
144 |
text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
|
|
|
169 |
model2 = model.wv[query]
|
170 |
# print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
|
171 |
df = pd.DataFrame(X)
|
172 |
+
#
|
173 |
+
# if 'melanin' in model.wv.key_to_index:
|
174 |
+
# print("The term 'melanin' is present in the model.")
|
175 |
+
# else:
|
176 |
+
# print("The term 'melanin' is not present in the model.")
|
177 |
|
|
|
|
|
|
|
|
|
178 |
|
179 |
def get_compound_ids(compound_names):
|
180 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
|
196 |
return None
|
197 |
|
198 |
|
199 |
+
# except:
|
200 |
+
# st.error("Term occurrence is too low - please try another term")
|
201 |
+
# st.stop()
|
202 |
st.markdown("---")
|
203 |
|
204 |
try:
|
|
|
210 |
pd.set_option('display.max_rows', None)
|
211 |
table2 = table.copy()
|
212 |
|
213 |
+
st.markdown(f"<h2 style='text-align: center; font-family: Arial; font-size: 20px; font-weight: bold;'>"
|
214 |
+
f"Top <span style='color:red; font-style: italic;'>500</span> words in a dimension-reduced embedding map showing similarity to <span style='color:red; font-style: italic;'>{query}</span> in <span style='color:red; font-style: italic;'>{database_name}</span> "
|
215 |
+
f"corpus</span></h2>",
|
|
|
216 |
unsafe_allow_html=True)
|
217 |
|
218 |
# Set the max number of words to display
|
|
|
220 |
|
221 |
try:
|
222 |
value_word = min(50, len(table2))
|
223 |
+
# Get the top 10000 similar words to the query
|
224 |
+
top_words = model.wv.most_similar_cosmul(query, topn=500)
|
225 |
words = [word for word, sim in top_words]
|
226 |
words = [word.replace(' ', '-') for word in words]
|
227 |
sims = [sim for word, sim in top_words]
|
228 |
+
X = model.wv[words]
|
229 |
+
|
230 |
+
# Add the query to the list of words and the embeddings array
|
231 |
+
words_with_query = [query] + words
|
232 |
+
X_with_query = np.vstack((model.wv[[query]], X))
|
233 |
+
|
234 |
+
# Perform t-SNE
|
235 |
+
tsne = TSNE(n_components=2, random_state=42)
|
236 |
+
X_tsne = tsne.fit_transform(X_with_query)
|
237 |
+
|
238 |
+
# Extract the t-SNE-transformed coordinates of the query and the top words
|
239 |
+
query_tsne = X_tsne[0]
|
240 |
+
X_top = X_tsne[1:]
|
241 |
|
242 |
# Compute similarities between query and top 100 words
|
243 |
sims_query_top = sims # print(sims_query_top)
|
|
|
245 |
print("Error:", e)
|
246 |
|
247 |
# Generate a 2D scatter plot of word embeddings using Plotly
|
248 |
+
fig = px.scatter(x=X_top[:, 0], y=X_top[:, 1], color=sims, color_continuous_scale="RdYlGn")
|
249 |
+
|
250 |
|
251 |
# Change background color to black
|
252 |
fig.update_layout(plot_bgcolor='#CCFFFF')
|
|
|
257 |
|
258 |
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
|
259 |
# fig.update_layout(title=dict(
|
260 |
+
# text=f"Top 10000 words in an interactive embedding map for {query} in {database_name} PubMed corpus"
|
261 |
+
# f": Zoom in to the black diamond to find {query}", x=0.5, y=1, xanchor='center', yanchor='top',
|
262 |
+
# font=dict(color='black')))
|
263 |
fig.update_coloraxes(colorbar_title=f"Similarity with {query}")
|
264 |
|
265 |
# Represent query as a large red diamond
|
266 |
fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='markers',
|
267 |
+
marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
|
268 |
showlegend=False))
|
269 |
|
270 |
# Add label for the query above the diamond
|
271 |
fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='text', text=[query],
|
272 |
+
textposition='top right', textfont=dict(color='blue', size=12), hoverinfo='none',
|
273 |
showlegend=False))
|
274 |
|
275 |
# Add circles for the top 50 similar words
|
|
|
283 |
|
284 |
st.plotly_chart(fig, use_container_width=True)
|
285 |
|
|
|
|
|
286 |
st.markdown(
|
287 |
+
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
|
288 |
+
f"</span>words contextually and semantically similar to "
|
289 |
+
f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
|
290 |
+
f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>",
|
291 |
+
unsafe_allow_html=True)
|
292 |
|
293 |
short_table = table2.head(value_word).round(2)
|
294 |
short_table.index += 1
|
|
|
301 |
|
302 |
df = short_table
|
303 |
|
|
|
304 |
df['text'] = short_table.index
|
305 |
df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
|
306 |
+
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in
|
307 |
+
short_table.index]
|
308 |
df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
|
309 |
|
310 |
df.loc[:, 'database'] = database_name
|
311 |
|
312 |
+
fig = px.treemap(df, path=[short_table.index], values=sizes,
|
313 |
+
custom_data=['href', 'text', 'database', 'href2'],
|
314 |
+
hover_name=(table2.head(value_word)['SIMILARITY']))
|
315 |
|
316 |
fig.update(layout_coloraxis_showscale=False)
|
317 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
|
318 |
fig.update_annotations(visible=False)
|
319 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
|
320 |
+
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
|
321 |
+
texttemplate="<br><span "
|
322 |
+
"style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
|
323 |
+
"<a href='%{customdata[0]}'>PubMed"
|
324 |
+
"</a><br><br><a href='%{customdata[3]}'>Wikipedia"
|
325 |
+
"</span></a>")
|
326 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
|
327 |
|
328 |
# st.pyplot(fig2)
|
|
|
334 |
|
335 |
csv = table2.head(value_word).to_csv().encode('utf-8')
|
336 |
st.download_button(label=f"download top {value_word} words (csv)", data=csv,
|
337 |
+
file_name=f'{database_name}_words.csv', mime='text/csv')
|
338 |
|
339 |
except:
|
340 |
st.warning(
|
|
|
343 |
st.warning(
|
344 |
"This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
|
345 |
|
|
|
|
|
346 |
# try:
|
347 |
# value_word = min(50, len(table2))
|
348 |
# # Get the top 50 similar words to the query
|
|
|
479 |
"Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
|
480 |
st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
|
481 |
st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
|
482 |
+
st.caption(
|
483 |
+
"In some cases genes may represent abbreviations of words and not genes, use pubmed link to confirm output is a gene")
|
484 |
|
485 |
csv = df1.head(value_gene).to_csv().encode('utf-8')
|
486 |
st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
|
|
|
510 |
except Exception as e:
|
511 |
print("Error:", e)
|
512 |
|
|
|
513 |
# Remove the text "Similarity Score" from each element in the sims list
|
514 |
sims_query_top = [float(sim.split()[-1]) for sim in sims]
|
515 |
# print(sims_query_top)
|
516 |
|
517 |
# Generate a 3D scatter plot of word embeddings using Plotly
|
518 |
fig2 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
|
519 |
+
color_continuous_scale="RdYlGn", hover_name=words, hover_data={"color": sims_query_top})
|
|
|
520 |
|
521 |
# Change background color to black
|
522 |
fig2.update_layout(scene=dict(bgcolor='#CCFFFF'))
|
523 |
|
524 |
# Change color of text to white
|
525 |
fig2.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
|
526 |
+
yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
|
527 |
+
zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
|
528 |
|
529 |
+
fig2.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
|
|
|
530 |
fig2.update_layout(
|
531 |
+
title=dict(text=f"", x=0.5, y=0.95, xanchor='center', yanchor='top', font=dict(color='black')),
|
532 |
+
scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
|
|
|
533 |
fig2.update_coloraxes(colorbar_title=f"Similarity with {query}")
|
534 |
|
535 |
# Represent query as a large red diamond
|
536 |
fig2.add_trace(
|
537 |
+
go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
|
538 |
+
marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
|
539 |
+
showlegend=False))
|
540 |
|
541 |
# Add label for the query above the diamond
|
542 |
+
fig2.add_trace(go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
|
543 |
+
text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
|
544 |
+
hoverinfo='none', showlegend=False))
|
|
|
545 |
|
546 |
# Add circles for the top 50 similar words
|
547 |
fig2.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
|
548 |
+
marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
|
549 |
+
hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
|
550 |
+
text=words, customdata=sims, name=''))
|
551 |
|
552 |
fig2.update(layout_coloraxis_showscale=True)
|
553 |
fig2.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
|
|
|
875 |
# st.plotly_chart(fig4, use_container_width=True)
|
876 |
# st.markdown("---")
|
877 |
|
|
|
878 |
st.markdown("---")
|
879 |
|
|
|
880 |
# print()
|
881 |
# print("Human genes similar to " + str(query))
|
882 |
df1 = table.copy()
|
|
|
1160 |
6. [Cosine Similarity Calculator](https://www.omnicalculator.com/math/cosine-similarity) - A calculator for computing cosine similarity, a common metric used in measuring similarity between vectors.
|
1161 |
""")
|
1162 |
|
|
|
1163 |
# else:
|
1164 |
# st.error("The password you entered is incorrect.")
|
1165 |
|