Spaces:

jfataphd
/

OncoDigger

Running

App Files Files Community

jfataphd commited on Feb 26, 2023

Commit

f21967a

1 Parent(s): 82bee27

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -44

app.py CHANGED Viewed

@@ -8,8 +8,7 @@ import squarify
 import numpy as np
 # Define the HTML and CSS styles
-st.markdown(
-    """
     <style>
     body {
         background-color: #EBF5FB;
@@ -20,33 +19,38 @@ st.markdown(
         # color: #ffffff;
     }
     </style>
-    """,
-    unsafe_allow_html=True
-)
 st.header("Word2Vec App for Clotting Pubmed Database.")
-text_input_value = st.text_input("Enter one term to search within the Clotting database")
 query = text_input_value
 query = query.lower()
 # query = input ("Enter your keyword(s):")
 if query:
-    bar = st.progress(0)
-    time.sleep(.2)
-    st.caption(":LightSkyBlue[searching 40123 PubMed abstracts]")
-    for i in range(10):
-        bar.progress((i+1)*10)
-        time.sleep(.1)
-    model = Word2Vec.load("pubmed_model_clotting")  # you can continue training with the loaded model!
-    words = list(model.wv.key_to_index)
-    X = model.wv[model.wv.key_to_index]
-    model2 = model.wv[query]
-    df = pd.DataFrame(X)
-# def findRelationships(query, df):
     table = model.wv.most_similar_cosmul(query, topn=10000)
     table = (pd.DataFrame(table))
     table.index.name = 'Rank'
@@ -70,7 +74,8 @@ if query:
     color = [cmap[i] for i in range(len(sizes))]
     short_table.set_index('Word', inplace=True)
-    squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, edgecolor="#EBF5FB", text_kwargs={'fontsize': 10})
     # # plot the treemap using matplotlib
     plt.axis('off')
     fig = plt.gcf()
@@ -80,11 +85,7 @@ if query:
     plt.clf()
     csv = table.head(100).to_csv().encode('utf-8')
-    st.download_button(
-        label="download top 100 words (csv)",
-        data=csv,
-        file_name='clotting_words.csv',
-        mime='text/csv')
     # st.write(short_table)
     #
@@ -104,14 +105,15 @@ if query:
     st.subheader(f"Top 10 Genes closely related to {query}")
     df10 = df1.head(10)
-    df10.index = 1/df10.index
     sizes = df10.index.tolist()
     cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
     color2 = [cmap2[i] for i in range(len(sizes))]
     df10.set_index('Human Gene', inplace=True)
-    squarify.plot(sizes=sizes, label=df10.index.tolist(), color=color2, edgecolor="#EBF5FB", text_kwargs={'fontsize': 12})
     #
     # # plot the treemap using matplotlib
@@ -124,24 +126,11 @@ if query:
     st.pyplot(fig2)
     csv = df1.head(100).to_csv().encode('utf-8')
-    st.download_button(
-        label="download top 100 genes (csv)",
-        data=csv,
-        file_name='clotting_genes.csv',
-        mime='text/csv')
 # findRelationships(query, df)
 # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
 # similar_words = model.most_similar(word)
 # output = json.dumps({"word": word, "similar_words": similar_words})

 import numpy as np
 # Define the HTML and CSS styles
+st.markdown("""
     <style>
     body {
         background-color: #EBF5FB;
         # color: #ffffff;
     }
     </style>
+    """, unsafe_allow_html=True)
 st.header("Word2Vec App for Clotting Pubmed Database.")
+text_input_value = st.text_input("Enter one term to search within the Clotting database", max_chars=50)
 query = text_input_value
 query = query.lower()
 # query = input ("Enter your keyword(s):")
 if query:
+    if query.isalpha():
+        bar = st.progress(0)
+        time.sleep(.2)
+        st.caption(":LightSkyBlue[searching 40123 PubMed abstracts]")
+        for i in range(10):
+            bar.progress((i + 1) * 10)
+            time.sleep(.1)
+    else:
+        st.write('Please omit numbers in term')
+    try:
+        model = Word2Vec.load("pubmed_model_clotting")  # you can continue training with the loaded model!
+        words = list(model.wv.key_to_index)
+        X = model.wv[model.wv.key_to_index]
+        model2 = model.wv[query]
+        df = pd.DataFrame(X)
+    except:
+        st.error("Term occurrence is too low - please try another term")
+        st.stop()
+    # def findRelationships(query, df):
     table = model.wv.most_similar_cosmul(query, topn=10000)
     table = (pd.DataFrame(table))
     table.index.name = 'Rank'
     color = [cmap[i] for i in range(len(sizes))]
     short_table.set_index('Word', inplace=True)
+    squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, edgecolor="#EBF5FB",
+                  text_kwargs={'fontsize': 10})
     # # plot the treemap using matplotlib
     plt.axis('off')
     fig = plt.gcf()
     plt.clf()
     csv = table.head(100).to_csv().encode('utf-8')
+    st.download_button(label="download top 100 words (csv)", data=csv, file_name='clotting_words.csv', mime='text/csv')
     # st.write(short_table)
     #
     st.subheader(f"Top 10 Genes closely related to {query}")
     df10 = df1.head(10)
+    df10.index = 1 / df10.index
     sizes = df10.index.tolist()
     cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
     color2 = [cmap2[i] for i in range(len(sizes))]
     df10.set_index('Human Gene', inplace=True)
+    squarify.plot(sizes=sizes, label=df10.index.tolist(), color=color2, edgecolor="#EBF5FB",
+                  text_kwargs={'fontsize': 12})
     #
     # # plot the treemap using matplotlib
     st.pyplot(fig2)
     csv = df1.head(100).to_csv().encode('utf-8')
+    st.download_button(label="download top 100 genes (csv)", data=csv, file_name='clotting_genes.csv', mime='text/csv')
 # findRelationships(query, df)
 # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
 # similar_words = model.most_similar(word)
 # output = json.dumps({"word": word, "similar_words": similar_words})