Browse files
@@ -1,8 +1,10 @@
1 |
import streamlit as st
2 |
import time
3 |
import json
4 |
from gensim.models import Word2Vec
5 |
import pandas as pd
6 |
import matplotlib.pyplot as plt
7 |
import squarify
8 |
import numpy as np
@@ -12,12 +14,13 @@ import random
12 |
import as px
13 |
14 |
15 |
16 |
17 |
layout="wide", #centered
18 |
19 |
20 |
'About': "
21 |
22 |
23 |
@@ -44,38 +47,70 @@ st.markdown("""
44 |
45 |
""", unsafe_allow_html=True)
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
st.header(":red[*F*]ast :red[*A*]cting :red[*T*]ext :red[*A*]nalysis (:red[*FATA*]) 4 Science")
66 |
67 |
st.subheader("Uncovering knowledge through Natural Language Processing (NLP)")
68 |
69 |
70 |
st.header(f":blue[{database_name} Pubmed corpus.]")
71 |
text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
72 |
query = text_input_value
73 |
query = query.lower()
74 |
query = re.sub("[,.?!&*;:
75 |
76 |
77 |
78 |
79 |
if query:
80 |
bar = st.progress(0)
81 |
@@ -86,313 +121,839 @@ if query:
86 |
87 |
88 |
# try:
89 |
model = Word2Vec.load(model_used) # you can continue training with the loaded model!
90 |
words = list(model.wv.key_to_index)
91 |
X = model.wv[model.wv.key_to_index]
92 |
model2 = model.wv[query]
93 |
df = pd.DataFrame(X)
94 |
95 |
# except:
96 |
# st.error("Term occurrence is too low - please try another term")
97 |
# st.stop()
98 |
99 |
# def findRelationships(query, df):
100 |
101 |
102 |
table = model.wv.most_similar_cosmul(query, topn=10000)
103 |
table = (pd.DataFrame(table))
104 | = 'Rank'
105 |
table.columns = ['Word', 'SIMILARITY']
106 |
107 |
# print()
108 |
# print("Similarity to " + str(query))
109 |
pd.set_option('display.max_rows', None)
110 |
table2 = table.copy()
111 |
# print(table.head(50))
112 |
# table.head(10).to_csv("clotting_sim1.csv", index=True)
113 |
# short_table = table.head(50)
114 |
# print(table)
115 |
116 |
117 |
118 |
119 |
f"<b><p style='font-family: Arial; font-size: 20px;'>
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
# label = short_table.index.tolist()
145 |
# print(short_table.index)
146 |
table2["SIMILARITY"] = 'Similarity Score ' + table2.head(10)["SIMILARITY"].round(2).astype(str)
147 |
rank_num = list(short_table.index.tolist())
148 |
# avg_size = sum(sizes) / len(short_table.index)
149 |
df = short_table
150 |
151 |
# Define the `text` column for labels and `href` column for links
152 |
df['text'] = short_table.index
153 |
154 |
df['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
155 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
156 |
df['href2'] = [f'' + c for c in short_table.index]
157 |
158 |
df.loc[:,'database'] = database_name
159 |
160 |
161 |
# print(sizes)
162 |
# '{0} in {1}'.format(unicode(, 'utf-8'), unicode(self.publication, 'utf-8'))
163 |
# Create the treemap using `px.treemap`
164 |
fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
# st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
185 |
186 |
187 |
188 |
189 |
190 |
191 |
f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number")
192 |
193 |
194 |
# st.write(short_table)
195 |
196 |
197 |
# print()
198 |
# print("Human genes similar to " + str(query))
199 |
df1 = table.copy()
200 |
df2 = pd.read_csv('
201 |
m = df1.Word.isin(df2.
202 |
df1 = df1[m]
203 |
df1.rename(columns={'Word': '
204 |
205 |
# print(df1.head(50))
206 |
# print()
207 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
208 |
# time.sleep(2)
209 |
# Create the slider with increments of 5 up to 100
210 |
211 |
212 |
213 |
f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
214 |
f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
215 |
216 |
value_gene = st.slider("Gene", 0, 100, step=5)
217 |
if value_gene > 0:
218 |
# st.subheader(f"Top {value} genes closely related to {query}: "
219 |
# f"Click on the Pubmed and NCBI links for more gene information")
220 |
221 |
222 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
223 |
f"</span>genes similar to "
224 |
f"<span style='color:red; font-style: italic;'>{query}:</span> Click on the squares to expand and the Pubmed and NCBI links for more gene information</span></p></b>",
225 |
226 |
227 |
df10 = df1.head(value_gene).copy()
228 |
df10.index = (1 / df10.index)*10000
229 |
sizes = df10.index.tolist()
230 |
df10.set_index('Human Gene', inplace=True)
231 |
232 |
df3 = df1.copy()
233 |
df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value_gene)["SIMILARITY"].round(2).astype(str)
234 |
235 |
df3 = df3.rename(columns={'Human Gene': 'symbol2'})
236 |
# Use df.query to get a subset of df1 based on ids in df2
237 |
subset = df3.head(value_gene).query('symbol2 in @df2.symbol2')
238 |
# Use merge to join the two DataFrames on id
239 |
result = pd.merge(subset, df2, on='symbol2')
240 |
# Show the result
241 |
# print(result)
242 |
# label = df10.index.tolist()
243 |
# df2 = df10
244 |
# print(df2)
245 |
246 |
# Define the `text` column for labels and `href` column for links
247 |
df10['text'] = df10.index
248 |
df10['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
249 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10['text']]
250 |
df10['href2'] = [f'' + c for c in df10['text']]
251 |
252 |
df10['name'] = [c for c in result['Approved name']]
253 |
assert isinstance(df10, object)
254 |
df10.loc[:,'database'] = database_name
255 |
256 |
# print(df['name'])
257 |
258 |
# Create the treemap using `px.treemap`
259 |
fig = px.treemap(df10, path=[df10['text']], values=sizes,
260 |
custom_data=['href', 'name', 'database', 'href2', 'text'], hover_name=(df3.head(value_gene)['SIMILARITY']))
261 |
262 |
263 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
264 |
265 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
266 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
267 |
texttemplate="<b><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}</span></b><br><span "
268 |
"style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
269 |
"<a href='%{customdata[0]}'>PubMed"
270 |
"</a><br><a href='%{customdata[3]}'>NCBI"
271 |
272 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
273 |
# # display the treemap in Streamlit
274 |
# with treemap2:
275 |
276 |
# st.pyplot(fig2)
277 |
st.plotly_chart(fig, use_container_width=True)
278 |
279 |
st.caption("Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC):")
280 |
st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
281 |
282 |
283 |
284 |
csv = df1.head(value_gene).to_csv().encode('utf-8')
285 |
st.download_button(label=f"download top {value_gene} genes (csv)", data=csv, file_name=f'{database_name}_genes.csv',
286 |
287 |
288 |
289 |
290 |
291 |
f"This selection exceeds the number of similar genes related to {query} within the {database_name} corpus, please choose a lower number")
292 |
293 |
294 |
# st.
295 |
296 |
297 |
# print()
298 |
# print("Human genes similar to " + str(query))
299 |
df1 = table.copy()
300 |
df2 = pd.read_csv('
301 |
m = df1.Word.isin(df2.
302 |
df1 = df1[m]
303 |
df1.rename(columns={'Word': '
304 |
# print(df1)
305 |
df_len = len(df1)
306 |
# df1["
307 |
# print(df1.head(50))
308 |
# print()
309 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
310 |
# time.sleep(2)
311 |
# Create the slider with increments of 5 up to 100
312 |
313 |
314 |
f"<b><p style='font-family: Arial; font-size: 20px;'>
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
369 |
370 |
"<a href='%{customdata[0]}'>PubMed"
371 |
"</a><br><a href='%{customdata[2]}'>Wikipedia"
372 |
373 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
374 |
# # display the treemap in Streamlit
375 |
# with treemap2:
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
st.download_button(label=f"download top {value_protein} proteins (csv)", data=csv, file_name=f'{database_name}_genes.csv',
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
st.subheader("Cancer-related videos")
394 |
if query:
395 |
396 |
search_keyword = {query}
397 |
html = urllib.request.urlopen("")
398 |
html2 = urllib.request.urlopen("")
@@ -418,15 +979,30 @@ if query:
418 |
419 |
c1, c2, c3 = st.columns(3)
420 |
421 |
422 |
with c1:
423 |
424 |
with c2:
425 |
426 |
with c3:
427 |
428 |
429 |
430 |
431 |
432 |
1 |
import streamlit as st
2 |
import time
3 |
import concurrent.futures
4 |
import json
5 |
from gensim.models import Word2Vec
6 |
import pandas as pd
7 |
import threading
8 |
import matplotlib.pyplot as plt
9 |
import squarify
10 |
import numpy as np
14 |
import as px
15 |
16 |
17 |
18 |
19 |
layout="wide", #centered
20 |
21 |
22 |
'About': "Abstractalytics is a Natural Language Processing (NLP) that harnesses Word2Vec to mine"
23 |
" insight from pubmed abstracts. Created by Jimmie E. Fata, PhD"
24 |
25 |
26 |
47 |
48 |
""", unsafe_allow_html=True)
49 |
50 |
51 |
52 |
st.subheader("*A web app designed to explore :red[*PubMed abstracts*] for deeper understanding and fresh insights, driven "
53 |
"by Natural Language Processing (NLP) techniques.*")
54 |
55 |
def custom_subheader(text, identifier, font_size):
56 |
st.markdown(f"<h3 id='{identifier}' style='font-size: {font_size}px;'>{text}</h3>", unsafe_allow_html=True)
57 |
58 |
custom_subheader("Welcome to our innovative web2vec app designed to unlock the wealth of knowledge and insights hidden "
59 |
"within PubMed abstracts! To begin, simply select a corpus that interests you. Next, enter a single keyword "
60 |
"you wish to explore within the corpus. Abstractalytics powerful Natural Language "
61 |
"Processing (NLP) algorithms will analyze the chosen corpus and present you with a list of top words, "
62 |
"genes, drugs, phytochemicals, and compounds that are contextually and semantically related "
63 |
"to your input. This advanced text-mining technique enables you to explore and understand complex "
64 |
"relationships, uncovering new discoveries and connections in your field of research across a massive "
65 |
"amount of abstracts. Dive in and enjoy the exploration! More oncology-related corpora comming soon.", "unique-id", 18)
66 |
67 |
68 |
69 |
#Define the correct password
70 |
71 |
72 |
# Define a function to check if the password is correct
73 |
# def authenticate(password):
74 |
# if password == CORRECT_PASSWORD:
75 |
# return True
76 |
# else:
77 |
# return False
78 |
79 |
# # Create a Streamlit input field for the password
80 |
# password = st.text_input("Enter password:", type="password")
81 |
82 |
# # If the password is correct, show the app content
83 |
# if authenticate(password):
84 |
opt ="Select a PubMed Corpus",
85 |
86 |
'Breast Cancer corpus', 'Lung Cancer corpus'))
87 |
# if opt == "Clotting corpus":
88 |
# model_used = ("pubmed_model_clotting")
89 |
# num_abstracts = 45493
90 |
# database_name = "Clotting"
91 |
# if opt == "Neuroblastoma corpus":
92 |
# model_used = ("pubmed_model_neuroblastoma")
93 |
# num_abstracts = 29032
94 |
# database_name = "Neuroblastoma"
95 |
if opt == "Breast Cancer corpus":
96 |
model_used = ("pubmed_model_breast_cancer2")
97 |
num_abstracts = 290320
98 |
database_name = "Breast_cancer"
99 |
if opt == "Lung Cancer corpus":
100 |
model_used = ("lung_cancer_pubmed_model")
101 |
num_abstracts = 210320
102 |
database_name = "Lung_cancer"
103 |
104 |
st.header(f":blue[{database_name} Pubmed corpus.]")
105 |
text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
106 |
query = text_input_value
107 |
query = query.lower()
108 |
query = re.sub("[,.?!&*;:]", "", query)
109 |
query = re.sub(" ", "-", query)
110 |
# matches = [" "]
111 |
# if any([x in query for x in matches]):
112 |
# st.write("Please only enter one term or a term without spaces")
113 |
# # query = input ("Enter your keyword(s):")
114 |
if query:
115 |
bar = st.progress(0)
116 |
121 |
122 |
123 |
# try:
124 |
model = Word2Vec.load(f"{model_used}") # you can continue training with the loaded model!
125 |
words = list(model.wv.key_to_index)
126 |
X = model.wv[model.wv.key_to_index]
127 |
# print(model.wv['bfgf'])
128 |
model2 = model.wv[query]
129 |
# print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
130 |
df = pd.DataFrame(X)
131 |
132 |
def get_compound_ids(compound_names):
133 |
with concurrent.futures.ThreadPoolExecutor() as executor:
134 |
compound_ids = list(, compound_names))
135 |
return compound_ids
136 |
137 |
138 |
import requests
139 |
140 |
141 |
def get_compound_id(compound_name):
142 |
url = f"{compound_name}"
143 |
response = requests.get(url)
144 |
if response.status_code == 200:
145 |
result = response.text.split('\n')
146 |
if result[0]:
147 |
compound_id = result[0].split('\t')[0]
148 |
return compound_id
149 |
return None
150 |
151 |
# except:
152 |
# st.error("Term occurrence is too low - please try another term")
153 |
# st.stop()
154 |
155 |
156 |
table = model.wv.most_similar_cosmul(query, topn=10000)
157 |
table = (pd.DataFrame(table))
158 | = 'Rank'
159 |
table.columns = ['Word', 'SIMILARITY']
160 |
161 |
pd.set_option('display.max_rows', None)
162 |
table2 = table.copy()
163 |
164 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
165 |
# f"<span style='color:red; font-style: italic;'>words</span> contextually "
166 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
167 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
168 |
# unsafe_allow_html=True)
169 |
170 |
# Set the max number of words to display
171 |
value_word = min(100, len(table2))
172 |
173 |
174 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
175 |
f"</span>words contextually and semantically similar to "
176 |
f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
177 |
f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>",
178 |
179 |
180 |
short_table = table2.head(value_word).round(2)
181 |
short_table.index += 1
182 |
short_table.index = (1 / short_table.index) * 10
183 |
sizes = short_table.index.tolist()
184 |
185 |
short_table.set_index('Word', inplace=True)
186 |
table2["SIMILARITY"] = 'Similarity Score ' + table2.head(value_word)["SIMILARITY"].round(2).astype(str)
187 |
rank_num = list(short_table.index.tolist())
188 |
189 |
df = short_table
190 |
191 |
df['text'] = short_table.index
192 |
df['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
193 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
194 |
df['href2'] = [f'' + c for c in short_table.index]
195 |
196 |
df.loc[:, 'database'] = database_name
197 |
198 |
fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
199 |
200 |
201 |
202 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
203 |
204 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
205 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
206 |
texttemplate="<br><span "
207 |
"style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
208 |
"<a href='%{customdata[0]}'>PubMed"
209 |
"</a><br><br><a href='%{customdata[3]}'>Wikipedia"
210 |
211 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
212 |
213 |
# st.pyplot(fig2)
214 |
st.plotly_chart(fig, use_container_width=True)
215 |
216 |
# st.caption(
217 |
# "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC):")
218 |
# st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
219 |
220 |
csv = table2.head(value_word).to_csv().encode('utf-8')
221 |
st.download_button(label=f"download top {value_word} words (csv)", data=csv,
222 |
file_name=f'{database_name}_words.csv', mime='text/csv')
223 |
224 |
225 |
f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number")
226 |
227 |
# st.markdown("---")
228 |
# # st.write(short_table)
229 |
# #
230 |
231 |
# # print()
232 |
# # print("Human genes similar to " + str(query))
233 |
# df1 = table.copy()
234 |
# df2 = pd.read_csv('Human Genes.csv')
235 |
# m = df1.Word.isin(df2.symbol)
236 |
# df1 = df1[m]
237 |
# df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
238 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
239 |
# # print(df1.head(50))
240 |
# # print()
241 |
# # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
242 |
# # time.sleep(2)
243 |
# # Create the slider with increments of 5 up to 100
244 |
245 |
# # Set the maximum number of genes to display up to 100
246 |
# value_gene = min(len(df1), 100)
247 |
248 |
# if value_gene > 0:
249 |
# # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Treemap visualization of "
250 |
# # f"<span style='color:red; font-style: italic;'>genes</span> contextually "
251 |
# # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
252 |
# # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
253 |
# # unsafe_allow_html=True)
254 |
255 |
# st.markdown(
256 |
# f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
257 |
# f"</span>genes contextually and semantically similar to "
258 |
# f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. "
259 |
# f"Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>",
260 |
# unsafe_allow_html=True)
261 |
262 |
# df10 = df1.head(value_gene).copy()
263 |
# df10.index = (1 / df10.index) * 100000
264 |
# sizes = df10.index.tolist()
265 |
# df10.set_index('Human Gene', inplace=True)
266 |
267 |
# df3 = df1.copy()
268 |
# df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value_gene)["SIMILARITY"].round(2).astype(str)
269 |
# df3.reset_index(inplace=True)
270 |
# df3 = df3.rename(columns={'Human Gene': 'symbol2'})
271 |
# # Use df.query to get a subset of df1 based on ids in df2
272 |
# subset = df3.head(value_gene).query('symbol2 in @df2.symbol2')
273 |
# # Use merge to join the two DataFrames on id
274 |
# result = pd.merge(subset, df2, on='symbol2')
275 |
# # Show the result
276 |
# # print(result)
277 |
# # label = df10.index.tolist()
278 |
# # df2 = df10
279 |
# # print(df2)
280 |
# try:
281 |
# # Define the `text` column for labels and `href` column for links
282 |
# df10['text'] = df10.index
283 |
# df10['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
284 |
# '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10['text']]
285 |
# df10['href2'] = [f'' + c for c in df10['text']]
286 |
287 |
# df10['name'] = [c for c in result['Approved name']]
288 |
# assert isinstance(df10, object)
289 |
# df10.loc[:, 'database'] = database_name
290 |
291 |
# # print(df['name'])
292 |
293 |
# # Create the treemap using `px.treemap`
294 |
# fig = px.treemap(df10, path=[df10['text']], values=sizes,
295 |
# custom_data=['href', 'name', 'database', 'href2', 'text'],
296 |
# hover_name=(df3.head(value_gene)['SIMILARITY']))
297 |
298 |
# fig.update(layout_coloraxis_showscale=False)
299 |
# fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
300 |
# fig.update_annotations(visible=False)
301 |
# fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
302 |
# hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
303 |
# texttemplate="<br><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}<br><br>"
304 |
# "%{customdata[1]}<br><br>"
305 |
# "<a href='%{customdata[0]}'>PubMed"
306 |
# "</a><br><br><a href='%{customdata[3]}'>GeneCard"
307 |
# "</span></a>")
308 |
# fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
309 |
# # # display the treemap in Streamlit
310 |
# # with treemap2:
311 |
312 |
# # st.pyplot(fig2)
313 |
# st.plotly_chart(fig, use_container_width=True)
314 |
315 |
# st.caption(
316 |
# "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC):")
317 |
# st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
318 |
# st.caption("Gene information provided by GeneCards:")
319 |
320 |
# csv = df1.head(value_gene).to_csv().encode('utf-8')
321 |
# st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
322 |
# file_name=f'{database_name}_genes.csv', mime='text/csv')
323 |
324 |
325 |
# except:
326 |
# st.warning(f"No similar genes related to {query} within the {database_name} corpus were found.")
327 |
328 |
329 |
330 |
df1 = table.copy()
331 |
df2 = pd.read_csv('Human Genes.csv')
332 |
m = df1.Word.isin(df2.symbol)
333 |
df1 = df1[m]
334 |
df1.rename(columns={'Word': 'Genes'}, inplace=True)
335 |
df_len = len(df1)
336 |
337 |
338 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
339 |
# f"<span style='color:red; font-style: italic;'>proteins</span> contextually "
340 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
341 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
342 |
# unsafe_allow_html=True)
343 |
344 |
# Set the number of proteins to display
345 |
value_gene = min(df_len, 100)
346 |
347 |
348 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
349 |
f"</span>human genes contextually and semantically similar to "
350 |
f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name} </span>corpus. Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>",
351 |
352 |
353 |
df11 = df1.head(value_gene).copy()
354 |
355 |
df11.index = (1 / df11.index) * 10000
356 |
sizes = df11.index.tolist()
357 |
358 |
df11.set_index('Genes', inplace=True)
359 |
360 |
df4 = df1.copy()
361 |
# print(df4.head(10))
362 |
df4["SIMILARITY"] = 'Similarity Score ' + df4.head(value_gene)["SIMILARITY"].round(2).astype(str)
363 |
364 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
365 |
# print(df4)
366 |
# # Use df.query to get a subset of df1 based on ids in df2
367 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
368 |
# # Use merge to join the two DataFrames on id
369 |
# result = pd.merge(subset, df2b, on='symbol2')
370 |
# print(result)
371 |
if value_gene <= df_len:
372 |
# Define the `text` column for labels and `href` column for links
373 |
df11['text'] = df11.index
374 |
df11['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
375 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df11['text']]
376 |
df11['href2'] = [f'' + c for c in df11['text']]
377 |
assert isinstance(df11, object)
378 |
df11['database'] = database_name
379 |
380 |
# df11['name'] = [c for c in result['Approved name']]
381 |
382 |
# Create the treemap using `px.treemap`
383 |
fig = px.treemap(df11, path=[df11['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
384 |
385 |
386 |
387 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
388 |
389 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
390 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
391 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
392 |
"<a href='%{customdata[0]}'>PubMed"
393 |
"</a><br><br><a href='%{customdata[2]}'>GeneCard"
394 |
395 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightPink"])
396 |
# # display the treemap in Streamlit
397 |
# with treemap2:
398 |
399 |
# st.pyplot(fig2)
400 |
st.plotly_chart(fig, use_container_width=True)
401 |
402 |
# st.caption(
403 |
# "Gene designation and database provided by KEGG homo sapien gene list:")
404 |
# st.caption("Gene information provided by GeneCards:")
405 |
st.caption("Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC):")
406 |
st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
407 |
st.caption("Gene information provided by GeneCards:")
408 |
409 |
csv = df1.head(value_gene).to_csv().encode('utf-8')
410 |
st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
411 |
file_name=f'{database_name}_genes.csv', mime='text/csv')
412 |
413 |
414 |
415 |
416 |
f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
417 |
418 |
# print()
419 |
# print("Human genes similar to " + str(query))
420 |
df1 = table.copy()
421 |
df2 = pd.read_csv('kegg_drug_list_lowercase.csv')
422 |
m = df1.Word.isin(df2.drugs)
423 |
df1 = df1[m]
424 |
df1.rename(columns={'Word': 'Drugs'}, inplace=True)
425 |
df_len = len(df1)
426 |
# print(len(df1))
427 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
428 |
# print(df1.head(50))
429 |
# print()
430 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
431 |
# time.sleep(2)
432 |
# Create the slider with increments of 5 up to 100
433 |
434 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
435 |
value_drug = min(df1.shape[0], 100)
436 |
437 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
438 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
439 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
440 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
441 |
# unsafe_allow_html=True)
442 |
443 |
444 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_drug} "
445 |
f"</span>Drugs contextually and semantically similar to "
446 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
447 |
448 |
449 |
df13 = df1.head(value_drug).copy()
450 |
451 |
df13.index = (1 / df13.index) * 10000
452 |
sizes = df13.index.tolist()
453 |
454 |
df13.set_index('Drugs', inplace=True)
455 |
456 |
df6 = df1.copy()
457 |
# print(df4.head(10))
458 |
df6["SIMILARITY"] = 'Similarity Score ' + df6.head(value_drug)["SIMILARITY"].round(2).astype(str)
459 |
460 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
461 |
# print(df4)
462 |
# # Use df.query to get a subset of df1 based on ids in df2
463 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
464 |
# # Use merge to join the two DataFrames on id
465 |
# result = pd.merge(subset, df2b, on='symbol2')
466 |
# print(result)
467 |
if value_drug <= df_len:
468 |
# Define the `text` column for labels and `href` column for links
469 |
# Reset the index
470 |
471 |
472 |
# Replace hyphens with spaces in the 'text' column
473 |
df13['Drugs'] = df13['Drugs'].str.replace('-', ' ')
474 |
475 |
# Set the 'text' column back as the index
476 |
df13.set_index('Drugs', inplace=True)
477 |
df13['text'] = df13.index
478 |
df13['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
479 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df13['text']]
480 |
df13['href2'] = [f'' + c for c in df13['text']]
481 |
assert isinstance(df13, object)
482 |
df13['database'] = database_name
483 |
484 |
# df11['name'] = [c for c in result['Approved name']]
485 |
486 |
# Create the treemap using `px.treemap`
487 |
fig = px.treemap(df13, path=[df13['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
488 |
489 |
490 |
491 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
492 |
493 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
494 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
495 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
496 |
"<a href='%{customdata[0]}'>PubMed"
497 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
498 |
499 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["Thistle"])
500 |
# # display the treemap in Streamlit
501 |
# with treemap2:
502 |
503 |
# st.pyplot(fig2)
504 |
st.plotly_chart(fig, use_container_width=True)
505 |
506 |
507 |
"Drug designation and database provided by KEGG:")
508 |
509 |
csv = df1.head(value_drug).to_csv().encode('utf-8')
510 |
st.download_button(label=f"download top {value_drug} drugs (csv)", data=csv,
511 |
file_name=f'{database_name}_drugs.csv', mime='text/csv')
512 |
513 |
514 |
515 |
516 |
f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number")
517 |
518 |
519 |
# st.markdown("---")
520 |
# # print()
521 |
# # print("Human genes similar to " + str(query))
522 |
# df1 = table.copy()
523 |
# df2 = pd.read_csv('diseasesKegg.csv')
524 |
# m = df1.Word.isin(df2.disease)
525 |
# df1 = df1[m]
526 |
# df1.rename(columns={'Word': 'Disease'}, inplace=True)
527 |
# df_len = len(df1)
528 |
# # print(len(df1))
529 |
# # df1["Human Gene"] = df1["Human Gene"].str.upper()
530 |
# # print(df1.head(50))
531 |
# # print()
532 |
# # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
533 |
# # time.sleep(2)
534 |
# # Create the slider with increments of 5 up to 100
535 |
536 |
# # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
537 |
# value_disease = min(df1.shape[0], 100)
538 |
539 |
# # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
540 |
# # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
541 |
# # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
542 |
# # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
543 |
# # unsafe_allow_html=True)
544 |
545 |
# st.markdown(
546 |
# f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_disease} "
547 |
# f"</span>Diseases contextually and semantically similar to "
548 |
# f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
549 |
# unsafe_allow_html=True)
550 |
551 |
# df14 = df1.head(value_disease).copy()
552 |
553 |
# df14.index = (1 / df14.index) * 10000
554 |
# sizes = df14.index.tolist()
555 |
556 |
# df14.set_index('Disease', inplace=True)
557 |
558 |
# df7 = df1.copy()
559 |
# # print(df4.head(10))
560 |
# df7["SIMILARITY"] = 'Similarity Score ' + df7.head(value_disease)["SIMILARITY"].round(2).astype(str)
561 |
# df7.reset_index(inplace=True)
562 |
# # df4 = df4.rename(columns={'Protein': 'symbol2'})
563 |
# # print(df4)
564 |
# # # Use df.query to get a subset of df1 based on ids in df2
565 |
# # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
566 |
# # # Use merge to join the two DataFrames on id
567 |
# # result = pd.merge(subset, df2b, on='symbol2')
568 |
# # print(result)
569 |
# if value_disease <= df_len:
570 |
# # Define the `text` column for labels and `href` column for links
571 |
# # Reset the index
572 |
# df14.reset_index(inplace=True)
573 |
574 |
# # Replace hyphens with spaces in the 'text' column
575 |
# df14['Disease'] = df14['Disease'].str.replace('-', ' ')
576 |
577 |
# # Set the 'text' column back as the index
578 |
# df14.set_index('Disease', inplace=True)
579 |
# df14['text'] = df14.index
580 |
# df14['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
581 |
# '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df14['text']]
582 |
# df14['href2'] = [f'' + c for c in df14['text']]
583 |
# assert isinstance(df14, object)
584 |
# df14['database'] = database_name
585 |
586 |
# # df11['name'] = [c for c in result['Approved name']]
587 |
588 |
# # Create the treemap using `px.treemap`
589 |
# fig = px.treemap(df14, path=[df14['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
590 |
# hover_name=(df7.head(value_disease)['SIMILARITY']))
591 |
592 |
# fig.update(layout_coloraxis_showscale=False)
593 |
# fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
594 |
# fig.update_annotations(visible=False)
595 |
# fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
596 |
# hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
597 |
# texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
598 |
# "<a href='%{customdata[0]}'>PubMed"
599 |
# "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
600 |
# "</span></a>")
601 |
# fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["PaleGoldenRod"])
602 |
# # # display the treemap in Streamlit
603 |
# # with treemap2:
604 |
605 |
# # st.pyplot(fig2)
606 |
# st.plotly_chart(fig, use_container_width=True)
607 |
608 |
# st.caption("Disease designation and database provided by KEGG:")
609 |
610 |
# csv = df1.head(value_disease).to_csv().encode('utf-8')
611 |
# st.download_button(label=f"download top {value_disease} diseases (csv)", data=csv,
612 |
# file_name=f'{database_name}_disease.csv', mime='text/csv')
613 |
614 |
615 |
# else:
616 |
# st.warning(
617 |
# f"This selection exceeds the number of similar diseases related to {query} within the {database_name} corpus, please choose a lower number")
618 |
# st.markdown("---")
619 |
620 |
# st.markdown("---")
621 |
# # print()
622 |
# # print("Human genes similar to " + str(query))
623 |
# df1 = table.copy()
624 |
# df2 = pd.read_csv('pathwaysKegg.csv')
625 |
# m = df1.Word.isin(df2.pathway)
626 |
# df1 = df1[m]
627 |
# df1.rename(columns={'Word': 'Pathway'}, inplace=True)
628 |
# df_len = len(df1)
629 |
# # print(len(df1))
630 |
# # df1["Human Gene"] = df1["Human Gene"].str.upper()
631 |
# # print(df1.head(50))
632 |
# # print()
633 |
# # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
634 |
# # time.sleep(2)
635 |
# # Create the slider with increments of 5 up to 100
636 |
637 |
# # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
638 |
# value_pathway = min(df1.shape[0], 100)
639 |
640 |
# # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
641 |
# # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
642 |
# # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
643 |
# # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
644 |
# # unsafe_allow_html=True)
645 |
646 |
# st.markdown(
647 |
# f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_pathway} "
648 |
# f"</span>Pathways contextually and semantically similar to "
649 |
# f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
650 |
# unsafe_allow_html=True)
651 |
652 |
# df16 = df1.head(value_pathway).copy()
653 |
654 |
# df16.index = (1 / df16.index) * 10000
655 |
# sizes = df16.index.tolist()
656 |
657 |
# df16.set_index('Pathway', inplace=True)
658 |
659 |
# df9 = df1.copy()
660 |
# # print(df4.head(10))
661 |
# df9["SIMILARITY"] = 'Similarity Score ' + df9.head(value_pathway)["SIMILARITY"].round(2).astype(str)
662 |
# df9.reset_index(inplace=True)
663 |
# # df4 = df4.rename(columns={'Protein': 'symbol2'})
664 |
# # print(df4)
665 |
# # # Use df.query to get a subset of df1 based on ids in df2
666 |
# # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
667 |
# # # Use merge to join the two DataFrames on id
668 |
# # result = pd.merge(subset, df2b, on='symbol2')
669 |
# # print(result)
670 |
# if value_pathway <= df_len:
671 |
# # Define the `text` column for labels and `href` column for links
672 |
# # Reset the index
673 |
# df16.reset_index(inplace=True)
674 |
675 |
# # Replace hyphens with spaces in the 'text' column
676 |
# df16['Pathway'] = df16['Pathway'].str.replace('-', ' ')
677 |
678 |
# # Set the 'text' column back as the index
679 |
# df16.set_index('Pathway', inplace=True)
680 |
# df16['text'] = df16.index
681 |
# df16['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
682 |
# '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df16['text']]
683 |
# df16['href2'] = [f'' + c for c in df16['text']]
684 |
# assert isinstance(df16, object)
685 |
# df16['database'] = database_name
686 |
687 |
# # df11['name'] = [c for c in result['Approved name']]
688 |
689 |
# # Create the treemap using `px.treemap`
690 |
# fig = px.treemap(df16, path=[df16['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
691 |
# hover_name=(df9.head(value_pathway)['SIMILARITY']))
692 |
693 |
# fig.update(layout_coloraxis_showscale=False)
694 |
# fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
695 |
# fig.update_annotations(visible=False)
696 |
# fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
697 |
# hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
698 |
# texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
699 |
# "<a href='%{customdata[0]}'>PubMed"
700 |
# "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
701 |
# "</span></a>")
702 |
# fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["FloralWhite"])
703 |
# # # display the treemap in Streamlit
704 |
# # with treemap2:
705 |
706 |
# # st.pyplot(fig2)
707 |
# st.plotly_chart(fig, use_container_width=True)
708 |
709 |
# st.caption("Pathway designation and database provided by KEGG:")
710 |
711 |
# csv = df1.head(value_pathway).to_csv().encode('utf-8')
712 |
# st.download_button(label=f"download top {value_pathway} pathways (csv)", data=csv,
713 |
# file_name=f'{database_name}_pathways.csv', mime='text/csv')
714 |
715 |
716 |
# else:
717 |
# st.warning(
718 |
# f"This selection exceeds the number of similar pathways related to {query} within the {database_name} corpus, please choose a lower number")
719 |
# st.markdown("---")
720 |
721 |
722 |
# print()
723 |
# print("Human genes similar to " + str(query))
724 |
df1 = table.copy()
725 |
df2 = pd.read_csv('phytochemicals.csv')
726 |
m = df1.Word.isin(df2.phyto)
727 |
df1 = df1[m]
728 |
df1.rename(columns={'Word': 'Phytochemical'}, inplace=True)
729 |
df_len = len(df1)
730 |
# print(len(df1))
731 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
732 |
# print(df1.head(50))
733 |
# print()
734 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
735 |
# time.sleep(2)
736 |
# Create the slider with increments of 5 up to 100
737 |
738 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
739 |
value_phyto = min(df1.shape[0], 100)
740 |
741 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
742 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
743 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
744 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
745 |
# unsafe_allow_html=True)
746 |
747 |
748 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_phyto} "
749 |
f"</span>Phytochemicals contextually and semantically similar to "
750 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
751 |
f"Click on the squares to expand and also the Pubmed and Wikipedia links for more compound information</span></p></b>",
752 |
753 |
754 |
df15 = df1.head(value_phyto).copy()
755 |
756 |
df15.index = (1 / df15.index) * 10000
757 |
sizes = df15.index.tolist()
758 |
759 |
df15.set_index('Phytochemical', inplace=True)
760 |
761 |
df8 = df1.copy()
762 |
# print(df4.head(10))
763 |
df8["SIMILARITY"] = 'Similarity Score ' + df8.head(value_phyto)["SIMILARITY"].round(2).astype(str)
764 |
765 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
766 |
# print(df4)
767 |
# # Use df.query to get a subset of df1 based on ids in df2
768 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
769 |
# # Use merge to join the two DataFrames on id
770 |
# result = pd.merge(subset, df2b, on='symbol2')
771 |
# print(result)
772 |
if value_phyto <= df_len:
773 |
# Define the `text` column for labels and `href` column for links
774 |
# Reset the index
775 |
776 |
777 |
# Replace hyphens with spaces in the 'text' column
778 |
df15['Phytochemical'] = df15['Phytochemical'].str.replace('-', ' ')
779 |
780 |
# Set the 'text' column back as the index
781 |
df15.set_index('Phytochemical', inplace=True)
782 |
df15['text'] = df15.index
783 |
df15['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
784 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df15['text']]
785 |
df15['href2'] = [f'' + c for c in df15['text']]
786 |
assert isinstance(df15, object)
787 |
df15['database'] = database_name
788 |
789 |
# df11['name'] = [c for c in result['Approved name']]
790 |
791 |
# Create the treemap using `px.treemap`
792 |
fig = px.treemap(df15, path=[df15['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
793 |
794 |
795 |
796 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
797 |
798 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
799 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
800 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
801 |
"<a href='%{customdata[0]}'>PubMed"
802 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
803 |
804 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightSeaGreen"])
805 |
# # display the treemap in Streamlit
806 |
# with treemap2:
807 |
808 |
# st.pyplot(fig2)
809 |
st.plotly_chart(fig, use_container_width=True)
810 |
811 |
st.caption("Phytochemical designation and database provided by PhytoHub:")
812 |
813 |
csv = df1.head(value_phyto).to_csv().encode('utf-8')
814 |
st.download_button(label=f"download top {value_phyto} phytochemicals (csv)", data=csv,
815 |
file_name=f'{database_name}_phytochemicals.csv', mime='text/csv')
816 |
817 |
818 |
819 |
820 |
f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number")
821 |
822 |
823 |
# print()
824 |
# print("Human genes similar to " + str(query))
825 |
df1 = table.copy()
826 |
df2 = pd.read_csv('kegg_compounds_lowercase.csv')
827 |
m = df1.Word.isin(df2.compound)
828 |
df1 = df1[m]
829 |
df1.rename(columns={'Word': 'Compounds'}, inplace=True)
830 |
df_len = len(df1)
831 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
832 |
# print(df1.head(50))
833 |
# print()
834 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
835 |
# time.sleep(2)
836 |
# Create the slider with increments of 5 up to 100
837 |
838 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
839 |
value_compound = min(df1.shape[0], 100)
840 |
841 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
842 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
843 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
844 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
845 |
# unsafe_allow_html=True)
846 |
847 |
848 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_compound} "
849 |
f"</span>Compounds contextually and semantically similar to "
850 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
851 |
f"Click on the squares to expand and the Pubmed, Wikipedia, and KEGG links for more compound information (may take time to load)</span></p></b>",
852 |
853 |
854 |
df12 = df1.head(value_compound).copy()
855 |
856 |
df12.index = (1 / df12.index) * 10000
857 |
sizes = df12.index.tolist()
858 |
859 |
df12.set_index('Compounds', inplace=True)
860 |
861 |
df5 = df1.copy()
862 |
# print(df4.head(10))
863 |
df5["SIMILARITY"] = 'Similarity Score ' + df5.head(value_compound)["SIMILARITY"].round(2).astype(str)
864 |
865 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
866 |
# print(df4)
867 |
# # Use df.query to get a subset of df1 based on ids in df2
868 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
869 |
# # Use merge to join the two DataFrames on id
870 |
# result = pd.merge(subset, df2b, on='symbol2')
871 |
# print(result)
872 |
873 |
if value_compound <= df_len:
874 |
# Define the `text` column for labels and `href` column for links
875 |
# Reset the index
876 |
877 |
878 |
# Replace hyphens with spaces in the 'text' column
879 |
df12['Compounds'] = df12['Compounds'].str.replace('-', ' ')
880 |
881 |
# Set the 'text' column back as the index
882 |
df12.set_index('Compounds', inplace=True)
883 |
df12['text'] = df12.index
884 |
df12['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
885 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df12['text']]
886 |
df12['href2'] = [f'' + c for c in df12['text']]
887 |
df12['href3'] = [f'{compound_id}' for compound_id in get_compound_ids(df12['text'])]
888 |
assert isinstance(df12, object)
889 |
df12['database'] = database_name
890 |
891 |
# df11['name'] = [c for c in result['Approved name']]
892 |
893 |
# Create the treemap using `px.treemap`
894 |
fig = px.treemap(df12, path=[df12['text']], values=sizes,
895 |
custom_data=['href', 'database', 'href2', 'text', 'href3'],
896 |
897 |
898 |
899 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
900 |
901 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
902 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
903 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
904 |
"<a href='%{customdata[0]}'>PubMed"
905 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
906 |
"</a><br><br><a href='%{customdata[4]}'>KEGG Compound Page"
907 |
908 |
909 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightYellow"])
910 |
# # display the treemap in Streamlit
911 |
# with treemap2:
912 |
913 |
# st.pyplot(fig2)
914 |
st.plotly_chart(fig, use_container_width=True)
915 |
916 |
st.caption("Compound designation and database provided by KEGG:")
917 |
918 |
csv = df1.head(value_compound).to_csv().encode('utf-8')
919 |
st.download_button(label=f"download top {value_compound} compounds (csv)", data=csv,
920 |
file_name=f'{database_name}_compounds.csv', mime='text/csv')
921 |
922 |
923 |
924 |
925 |
f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
926 |
927 |
928 |
929 |
def save_comment(comment):
930 |
with open('comments.txt', 'a') as f:
931 |
932 |
933 |
934 |
def save_comment_threaded(comment):
935 |
t = threading.Thread(target=save_comment, args=(comment,))
936 |
937 |
938 |
939 |
st.title("Abstractalytics Web App")
940 |
st.write("We appreciate your feedback!")
941 |
942 |
user_comment = st.text_area("Please send us your anonymous remarks/suggestions about the Abstractalytics Web App: "
943 |
"(app will pause while we save your comments)")
944 |
945 |
if st.button("Submit"):
946 |
if user_comment:
947 |
948 |
st.success("Your comment has been saved. Thank you for your feedback!")
949 |
950 |
st.warning("Please enter a comment before submitting.")
951 |
952 |
953 |
954 |
st.subheader("Cancer-related videos")
955 |
if query:
956 |
idlist = []
957 |
search_keyword = {query}
958 |
html = urllib.request.urlopen("")
959 |
html2 = urllib.request.urlopen("")
979 |
980 |
c1, c2, c3 = st.columns(3)
981 |
982 |
with c1:
983 |
+"" + video_ids[0])
984 |
with c2:
985 |
+"" + video_ids[1])
986 |
with c3:
987 |
+"" + video_ids[2])
988 |
989 |
990 |
# else:
991 |
# st.error("The password you entered is incorrect.")
992 |
993 |
994 |
995 |
996 |
997 |
998 |
999 |
1000 |
1001 |
1002 |
1003 |
1004 |
1005 |
1006 |
1007 |
1008 |