Spaces:

jfataphd
/

OncoDigger

Running

App Files Files Community

jfataphd commited on Apr 23, 2023

Commit

575134c

1 Parent(s): 7a0576f

Update app.py

Browse files

Files changed (1) hide show

app.py +335 -287

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import streamlit as st
 import time
 import concurrent.futures
 import json
 from gensim.models import Word2Vec
 import pandas as pd
 import threading
@@ -12,7 +14,7 @@ import re
 import urllib.request
 import random
 import plotly.express as px
 from streamlit.components.v1 import html
 st.set_page_config(page_title="OncoDigger", page_icon=":microscope:", layout="wide",  # centered
@@ -162,7 +164,6 @@ if query:
         # print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
         df = pd.DataFrame(X)
         def get_compound_ids(compound_names):
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 compound_ids = list(executor.map(get_compound_id, compound_names))
@@ -197,8 +198,6 @@ if query:
             pd.set_option('display.max_rows', None)
             table2 = table.copy()
             # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
             #             f"<span style='color:red; font-style: italic;'>words</span> contextually "
             #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
@@ -208,6 +207,58 @@ if query:
             # Set the max number of words to display
             value_word = min(100, len(table2))
             st.markdown(
             f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
             f"</span>words contextually and semantically similar to "
@@ -265,106 +316,62 @@ if query:
     except KeyError:
         st.warning(
             "This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
-    # st.markdown("---")
-    # # st.write(short_table)
-    # #
-    #
-    # # print()
-    # # print("Human genes similar to " + str(query))
-    # df1 = table.copy()
-    # df2 = pd.read_csv('Human Genes.csv')
-    # m = df1.Word.isin(df2.symbol)
-    # df1 = df1[m]
-    # df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
-    # df1["Human Gene"] = df1["Human Gene"].str.upper()
-    # # print(df1.head(50))
-    # # print()
-    # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
-    # # time.sleep(2)
-    # # Create the slider with increments of 5 up to 100
-    #
-    # # Set the maximum number of genes to display up to 100
-    # value_gene = min(len(df1), 100)
-    #
-    # if value_gene > 0:
-    #     # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Treemap visualization of "
-    #     #             f"<span style='color:red; font-style: italic;'>genes</span> contextually "
-    #     #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
-    #     #             f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
-    #     #     unsafe_allow_html=True)
-    #
-    #     st.markdown(
-    #         f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
-    #         f"</span>genes contextually and semantically similar to "
-    #         f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. "
-    #         f"Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>",
-    #         unsafe_allow_html=True)
     #
-    #     df10 = df1.head(value_gene).copy()
-    #     df10.index = (1 / df10.index) * 100000
-    #     sizes = df10.index.tolist()
-    #     df10.set_index('Human Gene', inplace=True)
     #
-    #     df3 = df1.copy()
-    #     df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value_gene)["SIMILARITY"].round(2).astype(str)
-    #     df3.reset_index(inplace=True)
-    #     df3 = df3.rename(columns={'Human Gene': 'symbol2'})
-    #     # Use df.query to get a subset of df1 based on ids in df2
-    #     subset = df3.head(value_gene).query('symbol2 in @df2.symbol2')
-    #     # Use merge to join the two DataFrames on id
-    #     result = pd.merge(subset, df2, on='symbol2')
-    #     # Show the result
-    #     # print(result)
-    #     # label = df10.index.tolist()
-    #     # df2 = df10
-    #     # print(df2)
-    #     try:
-    #         # Define the `text` column for labels and `href` column for links
-    #         df10['text'] = df10.index
-    #         df10['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
-    #                         '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10['text']]
-    #         df10['href2'] = [f'https://www.genecards.org/cgi-bin/carddisp.pl?gene=' + c for c in df10['text']]
     #
-    #         df10['name'] = [c for c in result['Approved name']]
-    #         assert isinstance(df10, object)
-    #         df10.loc[:, 'database'] = database_name
     #
-    #         # print(df['name'])
     #
-    #         # Create the treemap using `px.treemap`
-    #         fig = px.treemap(df10, path=[df10['text']], values=sizes,
-    #                          custom_data=['href', 'name', 'database', 'href2', 'text'],
-    #                          hover_name=(df3.head(value_gene)['SIMILARITY']))
     #
-    #         fig.update(layout_coloraxis_showscale=False)
-    #         fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
-    #         fig.update_annotations(visible=False)
-    #         fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
-    #                           hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
-    #                           texttemplate="<br><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}<br><br>"
-    #                                        "%{customdata[1]}<br><br>"
-    #                                        "<a href='%{customdata[0]}'>PubMed"
-    #                                        "</a><br><br><a href='%{customdata[3]}'>GeneCard"
-    #                                        "</span></a>")
-    #         fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
-    #         # # display the treemap in Streamlit
-    #         # with treemap2:
     #
-    #         # st.pyplot(fig2)
-    #         st.plotly_chart(fig, use_container_width=True)
     #
-    #         st.caption(
-    #             "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
-    #         st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
-    #         st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
     #
-    #         csv = df1.head(value_gene).to_csv().encode('utf-8')
-    #         st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
-    #                            file_name=f'{database_name}_genes.csv', mime='text/csv')
     #
     #
-    #     except:
-    #         st.warning(f"No similar genes related to {query} within the {database_name} corpus were found.")
     st.markdown("---")
@@ -375,7 +382,7 @@ if query:
         df1 = df1[m]
         df1.rename(columns={'Word': 'Genes'}, inplace=True)
         df_len = len(df1)
-        print(len(df1))
         # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
         #             f"<span style='color:red; font-style: italic;'>proteins</span> contextually "
@@ -457,6 +464,70 @@ if query:
         else:
             st.warning(
                 f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
         st.markdown("---")
         # print()
         # print("Human genes similar to " + str(query))
@@ -476,7 +547,7 @@ if query:
         # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
         value_drug = min(df1.shape[0], 100)
         # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
         #             f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
         #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
@@ -556,208 +627,65 @@ if query:
         else:
             st.warning(
                 f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number")
-        st.markdown("---")
-        #
-        # st.markdown("---")
-        # # print()
-        # # print("Human genes similar to " + str(query))
-        # df1 = table.copy()
-        # df2 = pd.read_csv('diseasesKegg.csv')
-        # m = df1.Word.isin(df2.disease)
-        # df1 = df1[m]
-        # df1.rename(columns={'Word': 'Disease'}, inplace=True)
-        # df_len = len(df1)
-        # # print(len(df1))
-        # # df1["Human Gene"] = df1["Human Gene"].str.upper()
-        # # print(df1.head(50))
-        # # print()
-        # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
-        # # time.sleep(2)
-        # # Create the slider with increments of 5 up to 100
-        #
-        # # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
-        # value_disease = min(df1.shape[0], 100)
-        #
-        # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
-        # #             f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
-        # #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
-        # #             f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
-        # #     unsafe_allow_html=True)
-        #
-        # st.markdown(
-        #     f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_disease} "
-        #     f"</span>Diseases contextually and semantically similar to "
-        #     f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
-        #     unsafe_allow_html=True)
-        #
-        # df14 = df1.head(value_disease).copy()
-        #
-        # df14.index = (1 / df14.index) * 10000
-        # sizes = df14.index.tolist()
-        #
-        # df14.set_index('Disease', inplace=True)
-        #
-        # df7 = df1.copy()
-        # # print(df4.head(10))
-        # df7["SIMILARITY"] = 'Similarity Score ' + df7.head(value_disease)["SIMILARITY"].round(2).astype(str)
-        # df7.reset_index(inplace=True)
-        # # df4 = df4.rename(columns={'Protein': 'symbol2'})
-        # # print(df4)
-        # # # Use df.query to get a subset of df1 based on ids in df2
-        # # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
-        # # # Use merge to join the two DataFrames on id
-        # # result = pd.merge(subset, df2b, on='symbol2')
-        # # print(result)
-        # if value_disease <= df_len:
-        #     # Define the `text` column for labels and `href` column for links
-        #     # Reset the index
-        #     df14.reset_index(inplace=True)
-        #
-        #     # Replace hyphens with spaces in the 'text' column
-        #     df14['Disease'] = df14['Disease'].str.replace('-', ' ')
-        #
-        #     # Set the 'text' column back as the index
-        #     df14.set_index('Disease', inplace=True)
-        #     df14['text'] = df14.index
-        #     df14['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
-        #                     '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df14['text']]
-        #     df14['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df14['text']]
-        #     assert isinstance(df14, object)
-        #     df14['database'] = database_name
-        #
-        #     # df11['name'] = [c for c in result['Approved name']]
-        #
-        #     # Create the treemap using `px.treemap`
-        #     fig = px.treemap(df14, path=[df14['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
-        #                      hover_name=(df7.head(value_disease)['SIMILARITY']))
-        #
-        #     fig.update(layout_coloraxis_showscale=False)
-        #     fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
-        #     fig.update_annotations(visible=False)
-        #     fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
-        #                       hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
-        #                       texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
-        #                                    "<a href='%{customdata[0]}'>PubMed"
-        #                                    "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
-        #                                    "</span></a>")
-        #     fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["PaleGoldenRod"])
-        #     # # display the treemap in Streamlit
-        #     # with treemap2:
-        #
-        #     # st.pyplot(fig2)
-        #     st.plotly_chart(fig, use_container_width=True)
-        #
-        #     st.caption("Disease designation and database provided by KEGG: https://www.genome.jp/kegg/disease/")
-        #
-        #     csv = df1.head(value_disease).to_csv().encode('utf-8')
-        #     st.download_button(label=f"download top {value_disease} diseases (csv)", data=csv,
-        #                        file_name=f'{database_name}_disease.csv', mime='text/csv')
-        #
         #
-        # else:
-        #     st.warning(
-        #         f"This selection exceeds the number of similar diseases related to {query} within the {database_name} corpus, please choose a lower number")
-        # st.markdown("---")
-        # st.markdown("---")
-        # # print()
-        # # print("Human genes similar to " + str(query))
-        # df1 = table.copy()
-        # df2 = pd.read_csv('pathwaysKegg.csv')
-        # m = df1.Word.isin(df2.pathway)
-        # df1 = df1[m]
-        # df1.rename(columns={'Word': 'Pathway'}, inplace=True)
-        # df_len = len(df1)
-        # # print(len(df1))
-        # # df1["Human Gene"] = df1["Human Gene"].str.upper()
-        # # print(df1.head(50))
-        # # print()
-        # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
-        # # time.sleep(2)
-        # # Create the slider with increments of 5 up to 100
-        #
-        # # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
-        # value_pathway = min(df1.shape[0], 100)
-        #
-        # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
-        # #             f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
-        # #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
-        # #             f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
-        # #     unsafe_allow_html=True)
-        #
-        # st.markdown(
-        #     f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_pathway} "
-        #     f"</span>Pathways contextually and semantically similar to "
-        #     f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
-        #     unsafe_allow_html=True)
-        #
-        # df16 = df1.head(value_pathway).copy()
-        #
-        # df16.index = (1 / df16.index) * 10000
-        # sizes = df16.index.tolist()
-        #
-        # df16.set_index('Pathway', inplace=True)
         #
-        # df9 = df1.copy()
-        # # print(df4.head(10))
-        # df9["SIMILARITY"] = 'Similarity Score ' + df9.head(value_pathway)["SIMILARITY"].round(2).astype(str)
-        # df9.reset_index(inplace=True)
-        # # df4 = df4.rename(columns={'Protein': 'symbol2'})
-        # # print(df4)
-        # # # Use df.query to get a subset of df1 based on ids in df2
-        # # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
-        # # # Use merge to join the two DataFrames on id
-        # # result = pd.merge(subset, df2b, on='symbol2')
-        # # print(result)
-        # if value_pathway <= df_len:
-        #     # Define the `text` column for labels and `href` column for links
-        #     # Reset the index
-        #     df16.reset_index(inplace=True)
         #
-        #     # Replace hyphens with spaces in the 'text' column
-        #     df16['Pathway'] = df16['Pathway'].str.replace('-', ' ')
         #
-        #     # Set the 'text' column back as the index
-        #     df16.set_index('Pathway', inplace=True)
-        #     df16['text'] = df16.index
-        #     df16['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
-        #                     '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df16['text']]
-        #     df16['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df16['text']]
-        #     assert isinstance(df16, object)
-        #     df16['database'] = database_name
         #
-        #     # df11['name'] = [c for c in result['Approved name']]
         #
-        #     # Create the treemap using `px.treemap`
-        #     fig = px.treemap(df16, path=[df16['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
-        #                      hover_name=(df9.head(value_pathway)['SIMILARITY']))
         #
-        #     fig.update(layout_coloraxis_showscale=False)
-        #     fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
-        #     fig.update_annotations(visible=False)
-        #     fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
-        #                       hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
-        #                       texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
-        #                                    "<a href='%{customdata[0]}'>PubMed"
-        #                                    "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
-        #                                    "</span></a>")
-        #     fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["FloralWhite"])
-        #     # # display the treemap in Streamlit
-        #     # with treemap2:
         #
-        #     # st.pyplot(fig2)
-        #     st.plotly_chart(fig, use_container_width=True)
         #
-        #     st.caption("Pathway designation and database provided by KEGG: https://www.genome.jp/kegg/pathway.html")
         #
-        #     csv = df1.head(value_pathway).to_csv().encode('utf-8')
-        #     st.download_button(label=f"download top {value_pathway} pathways (csv)", data=csv,
-        #                        file_name=f'{database_name}_pathways.csv', mime='text/csv')
         #
-        #
-        # else:
-        #     st.warning(
-        #         f"This selection exceeds the number of similar pathways related to {query} within the {database_name} corpus, please choose a lower number")
         # st.markdown("---")
         st.markdown("---")
@@ -860,8 +788,70 @@ if query:
         else:
             st.warning(
                 f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number")
         st.markdown("---")
         # print()
         # print("Human genes similar to " + str(query))
         df1 = table.copy()
@@ -966,8 +956,65 @@ if query:
         else:
             st.warning(
                 f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
-        st.markdown("---")
         # import os
         # from datasets import Dataset
@@ -1088,6 +1135,7 @@ if query:
         5. [Word2Vec: How to Implement Word2Vec in Python](https://www.youtube.com/watch?v=ISPId9Lhc1g&t=6s) - A YouTube video by Data Talks demonstrating how to implement Word2Vec in Python using the Gensim library.
         """)
 # else:
 #     st.error("The password you entered is incorrect.")

 import time
 import concurrent.futures
 import json
+# import tensorflow
 from gensim.models import Word2Vec
 import pandas as pd
 import threading
 import urllib.request
 import random
 import plotly.express as px
+import plotly.graph_objs as go
 from streamlit.components.v1 import html
 st.set_page_config(page_title="OncoDigger", page_icon=":microscope:", layout="wide",  # centered
         # print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
         df = pd.DataFrame(X)
         def get_compound_ids(compound_names):
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 compound_ids = list(executor.map(get_compound_id, compound_names))
             pd.set_option('display.max_rows', None)
             table2 = table.copy()
             # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
             #             f"<span style='color:red; font-style: italic;'>words</span> contextually "
             #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
             # Set the max number of words to display
             value_word = min(100, len(table2))
+            try:
+                value_word = min(50, len(table2))
+                # Get the top 50 similar words to the query
+                top_words = model.wv.most_similar_cosmul(query, topn=10000)
+                words = [word for word, sim in top_words]
+                words = [word.replace(' ', '-') for word in words]
+                sims = [sim for word, sim in top_words]
+                X_top = model.wv[words]
+                # Compute similarities between query and top 100 words
+                sims_query_top = sims  # print(sims_query_top)
+            except Exception as e:
+                print("Error:", e)
+            # Generate a 2D scatter plot of word embeddings using Plotly
+            fig = px.scatter(x=X_top[:, 0], y=X_top[:, 1], color=sims_query_top, color_continuous_scale="RdYlGn", )
+            # Change background color to black
+            fig.update_layout(plot_bgcolor='#CCFFFF')
+            # Change color of text to white
+            fig.update_layout(xaxis=dict(gridcolor='#CCFFFF', color='blue'),
+                              yaxis=dict(gridcolor='#CCFFFF', color='blue'))
+            # fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
+            fig.update_layout(title=dict(
+                text=f"Top 10000 words in an interactive embedding map for {query} in {database_name} PubMed corpus"
+                     f": Zoom in to the black diamond to find {query}", x=0.5, y=1, xanchor='center', yanchor='top',
+                font=dict(color='black')))
+            fig.update_coloraxes(colorbar_title="Similarity with query")
+            # Represent query as a large red diamond
+            fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='markers',
+                                     marker=dict(size=12, color='black', symbol='diamond'), name=query, hovertext=query,
+                                     showlegend=False))
+            # Add label for the query above the diamond
+            fig.add_trace(go.Scatter(x=[model.wv[query][0]], y=[model.wv[query][1]], mode='text', text=[query],
+                                     textposition='top right', textfont=dict(color='blue', size=10), hoverinfo='none',
+                                     showlegend=False))
+            # Add circles for the top 50 similar words
+            fig.add_trace(go.Scatter(x=X_top[:, 0], y=X_top[:, 1], mode='markers',
+                                     marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
+                                     text=words, customdata=sims, name=''))
+            fig.update(layout_coloraxis_showscale=True)
+            fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+            fig.update_annotations(visible=False)
+            st.plotly_chart(fig, use_container_width=True)
             st.markdown(
             f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
             f"</span>words contextually and semantically similar to "
     except KeyError:
         st.warning(
             "This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
+    # try:
+    #     value_word = min(50, len(table2))
+    #     # Get the top 50 similar words to the query
+    #     top_words = model.wv.most_similar_cosmul(query, topn=value_word)
+    #     words = [word for word, sim in top_words]
+    #     words = [word.replace(' ', '-') for word in words]
+    #     sims = [sim for word, sim in top_words]
+    #     X_top = model.wv[words]
     #
+    #     # Compute similarities between query and top 100 words
+    #     sims_query_top = sims  # print(sims_query_top)
+    # except Exception as e:
+    #     print("Error:", e)
     #
     #
+    # # Generate a 3D scatter plot of word embeddings using Plotly
+    # fig = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
+    #                     color_continuous_scale="RdYlGn", hover_name=words, hover_data={"color": sims_query_top})
     #
+    # # Change background color to black
+    # fig.update_layout(scene=dict(bgcolor='#CCFFFF'))
     #
+    # # Change color of text to white
+    # fig.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+    #                              yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+    #                              zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
     #
+    # fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
+    # fig.update_layout(title=dict(text=f"Word embedding map for {query} in {database_name} PubMed corpus", x=0.5, y=0.95,
+    #                              xanchor='center', yanchor='top', font=dict(color='black')),
+    #                   scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
+    # fig.update_coloraxes(colorbar_title="Similarity with query")
     #
+    # # Represent query as a large red diamond
+    # fig.add_trace(go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
+    #                            marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query, showlegend=False))
     #
+    # # Add label for the query above the diamond
+    # fig.add_trace(
+    #     go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text', text=[query],
+    #                  textposition='bottom center', textfont=dict(color='blue', size=10), hoverinfo='none', showlegend=False))
     #
+    # # Add circles for the top 50 similar words
+    # fig.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
+    #                            marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
+    #                            hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
+    #                            text=words, customdata=sims, name=''))
     #
+    # fig.update(layout_coloraxis_showscale=True)
+    # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+    # fig.update_annotations(visible=False)
     #
+    # st.plotly_chart(fig, use_container_width=True)
     st.markdown("---")
         df1 = df1[m]
         df1.rename(columns={'Word': 'Genes'}, inplace=True)
         df_len = len(df1)
+        # print(len(df1))
         # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
         #             f"<span style='color:red; font-style: italic;'>proteins</span> contextually "
         else:
             st.warning(
                 f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
+        try:
+            # Get the top 50 similar genes to the query
+            value_gene = min(df_len, 50)
+            top_words = model.wv.most_similar_cosmul(query, topn=value_gene)
+            words = df11.head(value_gene).index
+            words = [word.replace(' ', '-') for word in words]
+            # print(words)
+            sims = df4.head(value_gene)["SIMILARITY"].tolist()
+            # print(sims)
+            X_top = model.wv[words]  # print(X_top)
+        except Exception as e:
+            print("Error:", e)
+        # Remove the text "Similarity Score" from each element in the sims list
+        sims_query_top = [float(sim.split()[-1]) for sim in sims]
+        # print(sims_query_top)
+        # Generate a 3D scatter plot of word embeddings using Plotly
+        fig2 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
+                                 color_continuous_scale="RdYlGn", hover_name=words,
+                                 hover_data={"color": sims_query_top})
+        # Change background color to black
+        fig2.update_layout(scene=dict(bgcolor='#CCFFFF'))
+        # Change color of text to white
+        fig2.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+                                          yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+                                          zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
+        fig2.update_traces(
+                hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
+        fig2.update_layout(
+                title=dict(text=f"Word embedding map for {query} in {database_name} PubMed corpus", x=0.5, y=0.95,
+                           xanchor='center', yanchor='top', font=dict(color='black')),
+                scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
+        fig2.update_coloraxes(colorbar_title="Similarity with query")
+        # Represent query as a large red diamond
+        fig2.add_trace(
+                go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
+                             marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
+                             showlegend=False))
+        # Add label for the query above the diamond
+        fig2.add_trace(
+                go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
+                             text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
+                             hoverinfo='none', showlegend=False))
+        # Add circles for the top 50 similar words
+        fig2.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
+                                        marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
+                                        hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
+                                        text=words, customdata=sims, name=''))
+        fig2.update(layout_coloraxis_showscale=True)
+        fig2.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+        fig2.update_annotations(visible=False)
+        st.plotly_chart(fig2, use_container_width=True)
         st.markdown("---")
         # print()
         # print("Human genes similar to " + str(query))
         # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
         value_drug = min(df1.shape[0], 100)
+        # print(value_drug)
         # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
         #             f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
         #             f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
         else:
             st.warning(
                 f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number")
+        # try:
+        #     value_drug = min(df_len, 50)
+        #     top_words = model.wv.most_similar_cosmul(query, topn=value_drug)
+        #     # print(top_words)
+        #     words = df13.head(value_drug).index
+        #     words = [word.replace(' ', '-') for word in words]
+        #     # print(words)
+        #     sims = df6.head(value_drug)["SIMILARITY"].tolist()
+        #     # print(sims)
+        #     X_top = model.wv[words]
+        # except Exception as e:
+        #     print("Error:", e)
         #
         #
+        # # Remove the text "Similarity Score" from each element in the sims list
+        # sims_query_top = [float(sim.split()[-1]) for sim in sims]
+        # # print(sims_query_top)
         #
+        # # Generate a 3D scatter plot of word embeddings using Plotly
+        # fig4 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
+        #                      color_continuous_scale="RdYlGn", hover_name=words, hover_data={"color": sims_query_top})
         #
+        # # Change background color to black
+        # fig4.update_layout(scene=dict(bgcolor='#CCFFFF'))
         #
+        # # Change color of text to white
+        # fig4.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+        #                               yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+        #                               zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
         #
+        # fig4.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
+        # fig4.update_layout(
+        #     title=dict(text=f"Word embedding map for {query} in {database_name} PubMed corpus", x=0.5, y=0.95,
+        #                xanchor='center', yanchor='top', font=dict(color='black')),
+        #     scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
+        # fig4.update_coloraxes(colorbar_title="Similarity with query")
         #
+        # # Represent query as a large red diamond
+        # fig4.add_trace(
+        #     go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
+        #                  marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
+        #                  showlegend=False))
         #
+        # # Add label for the query above the diamond
+        # fig4.add_trace(go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
+        #                             text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
+        #                             hoverinfo='none', showlegend=False))
         #
+        # # Add circles for the top 50 similar words
+        # fig4.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
+        #                             marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
+        #                             hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
+        #                             text=words, customdata=sims, name=''))
         #
+        # fig4.update(layout_coloraxis_showscale=True)
+        # fig4.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+        # fig4.update_annotations(visible=False)
         #
+        # st.plotly_chart(fig4, use_container_width=True)
         # st.markdown("---")
         st.markdown("---")
         else:
             st.warning(
                 f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number")
+        # try:
+        #     value_phyto = min(df_len, 50)
+        #     top_words = model.wv.most_similar_cosmul(query, topn=value_phyto)
+        #     words = df15.head(value_phyto).index
+        #     words = [word.replace(' ', '-') for word in words]
+        #     # print(words)
+        #     sims = df8.head(value_phyto)["SIMILARITY"].tolist()
+        #     # print(sims)
+        #     X_top = model.wv[words]  # print(X_top)
+        # except Exception as e:
+        #     print("Error:", e)
+        #
+        # # Remove the text "Similarity Score" from each element in the sims list
+        # sims_query_top = [float(sim.split()[-1]) for sim in sims]
+        # # print(sims_query_top)
+        #
+        # # Generate a 3D scatter plot of word embeddings using Plotly
+        # fig4 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
+        #                      color_continuous_scale="RdYlGn", hover_name=words, hover_data={"color": sims_query_top})
+        #
+        # # Change background color to black
+        # fig4.update_layout(scene=dict(bgcolor='#CCFFFF'))
+        #
+        # # Change color of text to white
+        # fig4.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+        #                               yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+        #                               zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
+        #
+        # fig4.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
+        # fig4.update_layout(
+        #     title=dict(text=f"Word embedding map for {query} in {database_name} PubMed corpus", x=0.5, y=0.95,
+        #                xanchor='center', yanchor='top', font=dict(color='black')),
+        #     scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
+        # fig4.update_coloraxes(colorbar_title="Similarity with query")
+        #
+        # # Represent query as a large red diamond
+        # fig4.add_trace(
+        #     go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
+        #                  marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
+        #                  showlegend=False))
+        #
+        # # Add label for the query above the diamond
+        # fig4.add_trace(go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
+        #                             text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
+        #                             hoverinfo='none', showlegend=False))
+        #
+        # # Add circles for the top 50 similar words
+        # fig4.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
+        #                             marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
+        #                             hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
+        #                             text=words, customdata=sims, name=''))
+        #
+        # fig4.update(layout_coloraxis_showscale=True)
+        # fig4.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+        # fig4.update_annotations(visible=False)
+        #
+        # st.plotly_chart(fig4, use_container_width=True)
+        # st.markdown("---")
         st.markdown("---")
         # print()
         # print("Human genes similar to " + str(query))
         df1 = table.copy()
         else:
             st.warning(
                 f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
+        # try:
+        #     value_compound = min(df_len, 50)
+        #     top_words = model.wv.most_similar_cosmul(query, topn=value_compound)
+        #     words = df12.head(value_compound).index
+        #     words = [word.replace(' ', '-') for word in words]
+        #
+        #     sims = df5.head(value_compound)["SIMILARITY"].tolist()
+        #     # print(sims)
+        #     X_top = model.wv[words]  # print(X_top)
+        # except Exception as e:
+        #     print("Error:", e)
+        #
+        # # Remove the text "Similarity Score" from each element in the sims list
+        # sims_query_top = [float(sim.split()[-1]) for sim in sims]
+        # # print(sims_query_top)
+        #
+        # # Generate a 3D scatter plot of word embeddings using Plotly
+        # fig5 = px.scatter_3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], text=words, color=sims_query_top,
+        #                      color_continuous_scale="RdYlGn", hover_name=words, hover_data={"color": sims_query_top})
+        #
+        # # Change background color to black
+        # fig5.update_layout(scene=dict(bgcolor='#CCFFFF'))
+        #
+        # # Change color of text to white
+        # fig5.update_layout(scene=dict(xaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+        #                               yaxis=dict(backgroundcolor='#CCFFFF', color='blue'),
+        #                               zaxis=dict(backgroundcolor='#CCFFFF', color='blue')))
+        #
+        # fig5.update_traces(hovertemplate='<b>%{hovertext}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>')
+        # fig5.update_layout(
+        #     title=dict(text=f"Word embedding map for {query} in {database_name} PubMed corpus", x=0.5, y=0.95,
+        #                xanchor='center', yanchor='top', font=dict(color='black')),
+        #     scene=dict(xaxis_title="Dimension 1", yaxis_title="Dimension 2", zaxis_title="Dimension 3"))
+        # fig5.update_coloraxes(colorbar_title="Similarity with query")
+        #
+        # # Represent query as a large red diamond
+        # fig5.add_trace(
+        #     go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='markers',
+        #                  marker=dict(size=7, color='black', symbol='diamond'), name=query, hovertext=query,
+        #                  showlegend=False))
+        #
+        # # Add label for the query above the diamond
+        # fig5.add_trace(go.Scatter3d(x=[model.wv[query][0]], y=[model.wv[query][1]], z=[model.wv[query][2]], mode='text',
+        #                             text=[query], textposition='bottom center', textfont=dict(color='blue', size=10),
+        #                             hoverinfo='none', showlegend=False))
+        #
+        # # Add circles for the top 50 similar words
+        # fig5.add_trace(go.Scatter3d(x=X_top[:, 0], y=X_top[:, 1], z=X_top[:, 2], mode='markers',
+        #                             marker=dict(size=2, color=sims_query_top, colorscale='RdYlGn', symbol='circle'),
+        #                             hovertemplate='<b>%{text}</b><br>Similarity score: %{customdata[0]:.2f}<extra></extra>',
+        #                             text=words, customdata=sims, name=''))
+        #
+        # fig5.update(layout_coloraxis_showscale=True)
+        # fig5.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+        # fig5.update_annotations(visible=False)
+        #
+        # st.plotly_chart(fig5, use_container_width=True)
+        # st.markdown("---")
         # import os
         # from datasets import Dataset
         5. [Word2Vec: How to Implement Word2Vec in Python](https://www.youtube.com/watch?v=ISPId9Lhc1g&t=6s) - A YouTube video by Data Talks demonstrating how to implement Word2Vec in Python using the Gensim library.
         """)
 # else:
 #     st.error("The password you entered is incorrect.")