File size: 3,657 Bytes
1699569
 
 
 
 
e5a12b8
 
 
1699569
 
a6d026f
 
 
f192d73
 
 
 
afb8bf9
 
 
 
a6d026f
 
 
 
2bba935
 
 
 
 
 
 
 
 
 
 
 
 
 
8b5ed16
1699569
0916aa5
1699569
2bba935
 
b2912c4
e5a12b8
1699569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5a12b8
 
 
 
8b5ed16
e5a12b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1699569
4b2cc15
1699569
 
 
4b2cc15
1699569
 
 
e5a12b8
 
1699569
e5a12b8
 
8b5ed16
e5a12b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1699569
65ce061
2f21339
5ba2c0e
e5a12b8
 
 
 
1699569
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
import time
import json
from gensim.models import Word2Vec
import pandas as pd
import matplotlib.pyplot as plt
import squarify
import numpy as np

# Define the HTML and CSS styles
st.markdown(
    """
    <style>
    body {
        background-color: #000000;
        color: #ffffff;
    }
    .stApp {
        background-color: #000000;
        color: #ffffff;
    }
    </style>
    """,
    unsafe_allow_html=True
)

st.markdown(
    """
    <style>
    .stTextInput div label {
        color: #ffffff !important;
    }
    .stTextInput div input[type="text"] {
        color: #ffffff !important;
    }
    </style>
    """,
    unsafe_allow_html=True
)
st.header(":white[My Streamlit App with HTML and CSS]")
# Add some text to the app
st.write(":white[**This is my Streamlit app with HTML and CSS formatting.**]")

text_input_value = st.text_input("Enter some text", "")
query = text_input_value
query = query.lower()
# query = input ("Enter your keyword(s):")

if query:
    model = Word2Vec.load("pubmed_model_clotting")  # you can continue training with the loaded model!
    words = list(model.wv.key_to_index)
    X = model.wv[model.wv.key_to_index]
    model2 = model.wv[query]
    df = pd.DataFrame(X)


# def findRelationships(query, df):
    table = model.wv.most_similar_cosmul(query, topn=10000)
    table = (pd.DataFrame(table))
    table.index.name = 'Rank'
    table.columns = ['Word', 'SIMILARITY']
    print()
    print("Similarity to " + str(query))
    pd.set_option('display.max_rows', None)
    print(table.head(50))
    table.head(10).to_csv("clotting_sim1.csv", index=True)
    # short_table = table.head(50)
    # print(table)
    st.header(f":white[Similar Words to {query}]")

    # calculate the sizes of the squares in the treemap
    short_table = table.head(20)
    short_table.index += 1
    short_table.index = 1 / short_table.index
    sizes = short_table.index.tolist()

    cmap = plt.cm.Greens(np.linspace(0.05, .5, len(sizes)))
    color = [cmap[i] for i in range(len(sizes))]

    short_table.set_index('Word', inplace=True)
    squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, pad=.005, text_kwargs={'fontsize': 6})
    # # plot the treemap using matplotlib
    plt.axis('off')
    fig = plt.gcf()
    # # display the treemap in Streamlit
    st.pyplot(fig)
    plt.clf()

    # st.write(short_table)
    #

    print()
    print("Human genes similar to " + str(query))
    df1 = table
    df2 = pd.read_csv('Human_Genes.csv')
    m = df1.Word.isin(df2.symbol)
    df1 = df1[m]
    df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
    df1["Human Gene"] = df1["Human Gene"].str.upper()
    print(df1.head(50))
    print()
    df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
    # time.sleep(2)
    st.header(f":white[Similar Genes to {query}]")

    df1 = df1.head(20)
    df1.index = 1/df1.index
    sizes = df1.index.tolist()

    cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
    color2 = [cmap2[i] for i in range(len(sizes))]

    df1.set_index('Human Gene', inplace=True)
    squarify.plot(sizes=sizes, label=df1.index.tolist(), color=color2, pad=.005, text_kwargs={'fontsize': 8})
    #
    # # plot the treemap using matplotlib

    plt.axis('off')
    fig2 = plt.gcf()
    # plt.show()
    #
    # # display the treemap in Streamlit
    st.pyplot(fig2)



# findRelationships(query, df)







# model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
# similar_words = model.most_similar(word)
# output = json.dumps({"word": word, "similar_words": similar_words})
# st.write(output)