jfataphd commited on
Commit
6ce67d2
·
1 Parent(s): 6337933

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +848 -272
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import streamlit as st
2
  import time
 
3
  import json
4
  from gensim.models import Word2Vec
5
  import pandas as pd
 
6
  import matplotlib.pyplot as plt
7
  import squarify
8
  import numpy as np
@@ -12,12 +14,13 @@ import random
12
  import plotly.express as px
13
 
14
  st.set_page_config(
15
- page_title="FATA4 Science",
16
  page_icon=":microscope:",
17
  layout="wide", #centered
18
  initial_sidebar_state="auto",
19
  menu_items={
20
- 'About': "FATA4 Science is a Natural Language Processing (NLP) that ...."
 
21
  }
22
  )
23
 
@@ -44,38 +47,70 @@ st.markdown("""
44
  </style>
45
  """, unsafe_allow_html=True)
46
 
47
- opt=st.sidebar.radio("Select a PubMed Corpus", options=('Clotting corpus', 'Neuroblastoma corpus'))
48
- if opt == "Clotting corpus":
49
- model_used = ("pubmed_model_clotting")
50
- num_abstracts = 45493
51
- database_name = "Clotting"
52
- if opt == "Neuroblastoma corpus":
53
- model_used = ("pubmed_model_neuroblastoma")
54
- num_abstracts = 29032
55
- database_name = "Neuroblastoma"
56
- # if opt == "Breast Cancer corpus":
57
- # model_used = ("pubmed_model_breast_cancer")
58
- # num_abstracts = 290320
59
- # database_name = "Breast_cancer"
60
- # if opt == "Mammary gland corpus":
61
- # model_used = ("pubmed_model_mammary_gland")
62
- # num_abstracts = 79032
63
- # database_name = "Mammary_gland"
64
-
65
- st.header(":red[*F*]ast :red[*A*]cting :red[*T*]ext :red[*A*]nalysis (:red[*FATA*]) 4 Science")
66
-
67
- st.subheader("Uncovering knowledge through Natural Language Processing (NLP)")
68
  st.markdown("---")
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  st.header(f":blue[{database_name} Pubmed corpus.]")
71
  text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
72
  query = text_input_value
73
  query = query.lower()
74
- query = re.sub("[,.?!&*;: ]", "", query)
75
- matches = [" "]
76
- if any([x in query for x in matches]):
77
- st.write("Please only enter one term or a term without spaces")
78
- # query = input ("Enter your keyword(s):")
 
79
  if query:
80
  bar = st.progress(0)
81
  time.sleep(.05)
@@ -86,313 +121,839 @@ if query:
86
  time.sleep(.1)
87
 
88
  # try:
89
- model = Word2Vec.load(model_used) # you can continue training with the loaded model!
90
  words = list(model.wv.key_to_index)
91
  X = model.wv[model.wv.key_to_index]
 
92
  model2 = model.wv[query]
 
93
  df = pd.DataFrame(X)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # except:
96
  # st.error("Term occurrence is too low - please try another term")
97
  # st.stop()
98
  st.markdown("---")
99
- # def findRelationships(query, df):
100
-
101
 
102
  table = model.wv.most_similar_cosmul(query, topn=10000)
103
  table = (pd.DataFrame(table))
104
  table.index.name = 'Rank'
105
  table.columns = ['Word', 'SIMILARITY']
106
 
107
- # print()
108
- # print("Similarity to " + str(query))
109
  pd.set_option('display.max_rows', None)
110
  table2 = table.copy()
111
- # print(table.head(50))
112
- # table.head(10).to_csv("clotting_sim1.csv", index=True)
113
- # short_table = table.head(50)
114
- # print(table)
115
 
116
- # Create the slider with increments of 5 up to 100
 
 
 
 
 
 
 
117
 
118
  st.markdown(
119
- f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap with the slider below to visualize "
120
- f"<span style='color:red; font-style: italic;'>words</span> contextually "
121
- f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
122
- f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
123
  unsafe_allow_html=True)
124
- value_word = st.slider("Words", 0, 100, step=5)
125
- if value_word > 0:
126
- # st.subheader(f"Top {value} genes closely related to {query}: "
127
- # f"Click on the Pubmed and NCBI links for more gene information")
128
-
129
- st.markdown(
130
- f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
131
- f"</span>words similar to "
132
- f"<span style='color:red; font-style: italic;'>{query}:</span> Click on the squares to expand and the Wikipaedia links for more word information</span></p></b>",
133
- unsafe_allow_html=True)
134
-
135
-
136
- # calculate the sizes of the squares in the treemap
137
- short_table = table2.head(value_word).round(2)
138
- short_table.index += 1
139
- short_table.index = (1 / short_table.index)*10
140
- sizes = short_table.index.tolist()
141
-
142
-
143
- short_table.set_index('Word', inplace=True)
144
- # label = short_table.index.tolist()
145
- # print(short_table.index)
146
- table2["SIMILARITY"] = 'Similarity Score ' + table2.head(10)["SIMILARITY"].round(2).astype(str)
147
- rank_num = list(short_table.index.tolist())
148
- # avg_size = sum(sizes) / len(short_table.index)
149
- df = short_table
150
- try:
151
- # Define the `text` column for labels and `href` column for links
152
- df['text'] = short_table.index
153
-
154
- df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
155
- '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
156
- df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
157
-
158
- df.loc[:,'database'] = database_name
159
-
160
-
161
- # print(sizes)
162
- # '{0} in {1}'.format(unicode(self.author, 'utf-8'), unicode(self.publication, 'utf-8'))
163
- # Create the treemap using `px.treemap`
164
- fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
165
  hover_name=(table2.head(value_word)['SIMILARITY']))
166
 
167
- fig.update(layout_coloraxis_showscale=False)
168
- fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
169
- fig.update_annotations(visible=False)
170
- fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
171
- hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
172
- texttemplate="</b><br><span "
173
- "style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
174
- "<a href='%{customdata[0]}'>PubMed"
175
- "</a><br><a href='%{customdata[3]}'>Wikipedia"
176
- "</span></a>")
177
- fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # st.pyplot(fig2)
180
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- # st.caption(
183
- # "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
184
- # st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
185
 
186
- csv = table2.head(value_word).to_csv().encode('utf-8')
187
- st.download_button(label=f"download top {value_word} words (csv)", data=csv, file_name=f'{database_name}_words.csv',
188
- mime='text/csv')
189
- except:
190
- st.warning(
191
- f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number")
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  st.markdown("---")
194
- # st.write(short_table)
195
  #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
197
  # print()
198
  # print("Human genes similar to " + str(query))
199
  df1 = table.copy()
200
- df2 = pd.read_csv('Human_Genes.csv')
201
- m = df1.Word.isin(df2.symbol)
202
- df1 = df1[m].loc[:,:]
203
- df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
204
- df1["Human Gene"] = df1["Human Gene"].str.upper()
 
 
205
  # print(df1.head(50))
206
  # print()
207
  # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
208
  # time.sleep(2)
209
  # Create the slider with increments of 5 up to 100
210
 
211
- st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap with the slider below to visualize "
212
- f"<span style='color:red; font-style: italic;'>genes</span> contextually "
213
- f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
214
- f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
215
- unsafe_allow_html=True)
216
- value_gene = st.slider("Gene", 0, 100, step=5)
217
- if value_gene > 0:
218
- # st.subheader(f"Top {value} genes closely related to {query}: "
219
- # f"Click on the Pubmed and NCBI links for more gene information")
220
-
221
- st.markdown(
222
- f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
223
- f"</span>genes similar to "
224
- f"<span style='color:red; font-style: italic;'>{query}:</span> Click on the squares to expand and the Pubmed and NCBI links for more gene information</span></p></b>",
225
- unsafe_allow_html=True)
226
-
227
- df10 = df1.head(value_gene).copy()
228
- df10.index = (1 / df10.index)*10000
229
- sizes = df10.index.tolist()
230
- df10.set_index('Human Gene', inplace=True)
231
-
232
- df3 = df1.copy()
233
- df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value_gene)["SIMILARITY"].round(2).astype(str)
234
- df3.reset_index(inplace=True)
235
- df3 = df3.rename(columns={'Human Gene': 'symbol2'})
236
- # Use df.query to get a subset of df1 based on ids in df2
237
- subset = df3.head(value_gene).query('symbol2 in @df2.symbol2')
238
- # Use merge to join the two DataFrames on id
239
- result = pd.merge(subset, df2, on='symbol2')
240
- # Show the result
241
- # print(result)
242
- # label = df10.index.tolist()
243
- # df2 = df10
244
- # print(df2)
245
- try:
246
- # Define the `text` column for labels and `href` column for links
247
- df10['text'] = df10.index
248
- df10['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
249
- '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10['text']]
250
- df10['href2'] = [f'https://www.ncbi.nlm.nih.gov/gene/?term=' + c for c in df10['text']]
251
-
252
- df10['name'] = [c for c in result['Approved name']]
253
- assert isinstance(df10, object)
254
- df10.loc[:,'database'] = database_name
255
-
256
- # print(df['name'])
257
-
258
- # Create the treemap using `px.treemap`
259
- fig = px.treemap(df10, path=[df10['text']], values=sizes,
260
- custom_data=['href', 'name', 'database', 'href2', 'text'], hover_name=(df3.head(value_gene)['SIMILARITY']))
261
-
262
- fig.update(layout_coloraxis_showscale=False)
263
- fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
264
- fig.update_annotations(visible=False)
265
- fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
266
- hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
267
- texttemplate="<b><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}</span></b><br><span "
268
- "style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
269
- "<a href='%{customdata[0]}'>PubMed"
270
- "</a><br><a href='%{customdata[3]}'>NCBI"
271
- "</span></a>")
272
- fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
273
- # # display the treemap in Streamlit
274
- # with treemap2:
275
-
276
- # st.pyplot(fig2)
277
- st.plotly_chart(fig, use_container_width=True)
278
-
279
- st.caption("Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
280
- st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
281
-
282
-
283
-
284
- csv = df1.head(value_gene).to_csv().encode('utf-8')
285
- st.download_button(label=f"download top {value_gene} genes (csv)", data=csv, file_name=f'{database_name}_genes.csv',
286
- mime='text/csv')
287
-
288
-
289
- except:
290
- st.warning(
291
- f"This selection exceeds the number of similar genes related to {query} within the {database_name} corpus, please choose a lower number")
292
- st.markdown("---")
293
 
294
- # st.write(short_table)
295
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  # print()
298
  # print("Human genes similar to " + str(query))
299
  df1 = table.copy()
300
- df2 = pd.read_csv('protein.csv')
301
- m = df1.Word.isin(df2.protein)
302
  df1 = df1[m]
303
- df1.rename(columns={'Word': 'Protein'}, inplace=True)
304
- # print(df1)
305
  df_len = len(df1)
306
- # df1["Protein"] = df1["Protein"].str.upper()
307
  # print(df1.head(50))
308
  # print()
309
  # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
310
  # time.sleep(2)
311
  # Create the slider with increments of 5 up to 100
312
 
 
 
 
 
 
 
 
 
 
313
  st.markdown(
314
- f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap with the slider below to visualize "
315
- f"<span style='color:red; font-style: italic;'>proteins</span> contextually "
316
- f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
317
- f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
318
  unsafe_allow_html=True)
319
- value_protein = st.slider("Protein", 0, 100, step=5)
320
- # print(value_protein)
321
- if value_protein > 0:
322
- # st.subheader(f"Top {value} genes closely related to {query}: "
323
- # f"Click on the Pubmed and NCBI links for more gene information")
324
-
325
- st.markdown(
326
- f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_protein} "
327
- f"</span>proteins similar to "
328
- f"<span style='color:red; font-style: italic;'>{query}:</span> Click on the squares to expand and the Pubmed and Wikipedia links for more protein information</span></p></b>",
329
- unsafe_allow_html=True)
330
-
331
- df11 = df1.head(value_protein).copy()
332
-
333
- df11.index = (1 / df11.index) * 10000
334
- sizes = df11.index.tolist()
335
-
336
- df11.set_index('Protein', inplace=True)
337
-
338
- df4 = df1.copy()
339
- # print(df4.head(10))
340
- df4["SIMILARITY"] = 'Similarity Score ' + df4.head(value_protein)["SIMILARITY"].round(2).astype(str)
341
- df4.reset_index(inplace=True)
342
- # df4 = df4.rename(columns={'Protein': 'symbol2'})
343
- # print(df4)
344
- # # Use df.query to get a subset of df1 based on ids in df2
345
- # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
346
- # # Use merge to join the two DataFrames on id
347
- # result = pd.merge(subset, df2b, on='symbol2')
348
- # print(result)
349
- if value_protein <= df_len:
350
- # Define the `text` column for labels and `href` column for links
351
- df11['text'] = df11.index
352
- df11['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
353
- '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df11['text']]
354
- df11['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df11['text']]
355
- assert isinstance(df11, object)
356
- df11['database'] = database_name
357
-
358
- # df11['name'] = [c for c in result['Approved name']]
359
-
360
- # Create the treemap using `px.treemap`
361
- fig = px.treemap(df11, path=[df11['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
362
- hover_name=(df4.head(value_protein)['SIMILARITY']))
363
-
364
- fig.update(layout_coloraxis_showscale=False)
365
- fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
366
- fig.update_annotations(visible=False)
367
- fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
368
  hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
369
- texttemplate="<b><span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}</span></b><br>"
370
  "<a href='%{customdata[0]}'>PubMed"
371
- "</a><br><a href='%{customdata[2]}'>Wikipedia"
 
372
  "</span></a>")
373
- fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
374
- # # display the treemap in Streamlit
375
- # with treemap2:
376
 
377
- # st.pyplot(fig2)
378
- st.plotly_chart(fig, use_container_width=True)
 
379
 
380
- st.caption(
381
- "Protein designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
382
 
383
- csv = df1.head(value_protein).to_csv().encode('utf-8')
384
- st.download_button(label=f"download top {value_protein} proteins (csv)", data=csv, file_name=f'{database_name}_genes.csv',
385
- mime='text/csv')
386
 
 
 
 
387
 
388
- else:
389
- st.warning(f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
 
 
390
  st.markdown("---")
391
 
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  st.subheader("Cancer-related videos")
394
  if query:
395
- idlist=[]
396
  search_keyword = {query}
397
  html = urllib.request.urlopen("https://www.youtube.com/@NCIgov/search?query=cancer")
398
  html2 = urllib.request.urlopen("https://www.youtube.com/@CancerCenter/search?query=cancer")
@@ -418,15 +979,30 @@ if query:
418
 
419
  c1, c2, c3 = st.columns(3)
420
 
421
-
422
  with c1:
423
- st.video("https://www.youtube.com/watch?v=" + video_ids[0])
424
  with c2:
425
- st.video("https://www.youtube.com/watch?v=" + video_ids[1])
426
  with c3:
427
- st.video("https://www.youtube.com/watch?v=" + video_ids[2])
428
  st.markdown("---")
429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
 
432
 
 
1
  import streamlit as st
2
  import time
3
+ import concurrent.futures
4
  import json
5
  from gensim.models import Word2Vec
6
  import pandas as pd
7
+ import threading
8
  import matplotlib.pyplot as plt
9
  import squarify
10
  import numpy as np
 
14
  import plotly.express as px
15
 
16
  st.set_page_config(
17
+ page_title="Abstractalytics",
18
  page_icon=":microscope:",
19
  layout="wide", #centered
20
  initial_sidebar_state="auto",
21
  menu_items={
22
+ 'About': "Abstractalytics is a Natural Language Processing (NLP) that harnesses Word2Vec to mine"
23
+ " insight from pubmed abstracts. Created by Jimmie E. Fata, PhD"
24
  }
25
  )
26
 
 
47
  </style>
48
  """, unsafe_allow_html=True)
49
 
50
+ st.header(":red[*Abstractalytics*]")
51
+
52
+ st.subheader("*A web app designed to explore :red[*PubMed abstracts*] for deeper understanding and fresh insights, driven "
53
+ "by Natural Language Processing (NLP) techniques.*")
54
+
55
+ def custom_subheader(text, identifier, font_size):
56
+ st.markdown(f"<h3 id='{identifier}' style='font-size: {font_size}px;'>{text}</h3>", unsafe_allow_html=True)
57
+
58
+ custom_subheader("Welcome to our innovative web2vec app designed to unlock the wealth of knowledge and insights hidden "
59
+ "within PubMed abstracts! To begin, simply select a corpus that interests you. Next, enter a single keyword "
60
+ "you wish to explore within the corpus. Abstractalytics powerful Natural Language "
61
+ "Processing (NLP) algorithms will analyze the chosen corpus and present you with a list of top words, "
62
+ "genes, drugs, phytochemicals, and compounds that are contextually and semantically related "
63
+ "to your input. This advanced text-mining technique enables you to explore and understand complex "
64
+ "relationships, uncovering new discoveries and connections in your field of research across a massive "
65
+ "amount of abstracts. Dive in and enjoy the exploration! More oncology-related corpora comming soon.", "unique-id", 18)
66
+
 
 
 
 
67
  st.markdown("---")
68
 
69
+ #Define the correct password
70
+ # CORRECT_PASSWORD = "123"
71
+
72
+ # Define a function to check if the password is correct
73
+ # def authenticate(password):
74
+ # if password == CORRECT_PASSWORD:
75
+ # return True
76
+ # else:
77
+ # return False
78
+ #
79
+ # # Create a Streamlit input field for the password
80
+ # password = st.text_input("Enter password:", type="password")
81
+ #
82
+ # # If the password is correct, show the app content
83
+ # if authenticate(password):
84
+ opt = st.sidebar.radio("Select a PubMed Corpus",
85
+ options=(
86
+ 'Breast Cancer corpus', 'Lung Cancer corpus'))
87
+ # if opt == "Clotting corpus":
88
+ # model_used = ("pubmed_model_clotting")
89
+ # num_abstracts = 45493
90
+ # database_name = "Clotting"
91
+ # if opt == "Neuroblastoma corpus":
92
+ # model_used = ("pubmed_model_neuroblastoma")
93
+ # num_abstracts = 29032
94
+ # database_name = "Neuroblastoma"
95
+ if opt == "Breast Cancer corpus":
96
+ model_used = ("pubmed_model_breast_cancer2")
97
+ num_abstracts = 290320
98
+ database_name = "Breast_cancer"
99
+ if opt == "Lung Cancer corpus":
100
+ model_used = ("lung_cancer_pubmed_model")
101
+ num_abstracts = 210320
102
+ database_name = "Lung_cancer"
103
+
104
  st.header(f":blue[{database_name} Pubmed corpus.]")
105
  text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
106
  query = text_input_value
107
  query = query.lower()
108
+ query = re.sub("[,.?!&*;:]", "", query)
109
+ query = re.sub(" ", "-", query)
110
+ # matches = [" "]
111
+ # if any([x in query for x in matches]):
112
+ # st.write("Please only enter one term or a term without spaces")
113
+ # # query = input ("Enter your keyword(s):")
114
  if query:
115
  bar = st.progress(0)
116
  time.sleep(.05)
 
121
  time.sleep(.1)
122
 
123
  # try:
124
+ model = Word2Vec.load(f"{model_used}") # you can continue training with the loaded model!
125
  words = list(model.wv.key_to_index)
126
  X = model.wv[model.wv.key_to_index]
127
+ # print(model.wv['bfgf'])
128
  model2 = model.wv[query]
129
+ # print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
130
  df = pd.DataFrame(X)
131
 
132
+ def get_compound_ids(compound_names):
133
+ with concurrent.futures.ThreadPoolExecutor() as executor:
134
+ compound_ids = list(executor.map(get_compound_id, compound_names))
135
+ return compound_ids
136
+
137
+
138
+ import requests
139
+
140
+
141
+ def get_compound_id(compound_name):
142
+ url = f"http://rest.kegg.jp/find/compound/{compound_name}"
143
+ response = requests.get(url)
144
+ if response.status_code == 200:
145
+ result = response.text.split('\n')
146
+ if result[0]:
147
+ compound_id = result[0].split('\t')[0]
148
+ return compound_id
149
+ return None
150
+
151
  # except:
152
  # st.error("Term occurrence is too low - please try another term")
153
  # st.stop()
154
  st.markdown("---")
 
 
155
 
156
  table = model.wv.most_similar_cosmul(query, topn=10000)
157
  table = (pd.DataFrame(table))
158
  table.index.name = 'Rank'
159
  table.columns = ['Word', 'SIMILARITY']
160
 
 
 
161
  pd.set_option('display.max_rows', None)
162
  table2 = table.copy()
 
 
 
 
163
 
164
+ # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
165
+ # f"<span style='color:red; font-style: italic;'>words</span> contextually "
166
+ # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
167
+ # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
168
+ # unsafe_allow_html=True)
169
+
170
+ # Set the max number of words to display
171
+ value_word = min(100, len(table2))
172
 
173
  st.markdown(
174
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
175
+ f"</span>words contextually and semantically similar to "
176
+ f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
177
+ f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>",
178
  unsafe_allow_html=True)
179
+
180
+ short_table = table2.head(value_word).round(2)
181
+ short_table.index += 1
182
+ short_table.index = (1 / short_table.index) * 10
183
+ sizes = short_table.index.tolist()
184
+
185
+ short_table.set_index('Word', inplace=True)
186
+ table2["SIMILARITY"] = 'Similarity Score ' + table2.head(value_word)["SIMILARITY"].round(2).astype(str)
187
+ rank_num = list(short_table.index.tolist())
188
+
189
+ df = short_table
190
+ try:
191
+ df['text'] = short_table.index
192
+ df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
193
+ '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
194
+ df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
195
+
196
+ df.loc[:, 'database'] = database_name
197
+
198
+ fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  hover_name=(table2.head(value_word)['SIMILARITY']))
200
 
201
+ fig.update(layout_coloraxis_showscale=False)
202
+ fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
203
+ fig.update_annotations(visible=False)
204
+ fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
205
+ hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
206
+ texttemplate="<br><span "
207
+ "style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
208
+ "<a href='%{customdata[0]}'>PubMed"
209
+ "</a><br><br><a href='%{customdata[3]}'>Wikipedia"
210
+ "</span></a>")
211
+ fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
212
+
213
+ # st.pyplot(fig2)
214
+ st.plotly_chart(fig, use_container_width=True)
215
+
216
+ # st.caption(
217
+ # "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
218
+ # st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
219
+
220
+ csv = table2.head(value_word).to_csv().encode('utf-8')
221
+ st.download_button(label=f"download top {value_word} words (csv)", data=csv,
222
+ file_name=f'{database_name}_words.csv', mime='text/csv')
223
+ except:
224
+ st.warning(
225
+ f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number")
226
+
227
+ # st.markdown("---")
228
+ # # st.write(short_table)
229
+ # #
230
+ #
231
+ # # print()
232
+ # # print("Human genes similar to " + str(query))
233
+ # df1 = table.copy()
234
+ # df2 = pd.read_csv('Human Genes.csv')
235
+ # m = df1.Word.isin(df2.symbol)
236
+ # df1 = df1[m]
237
+ # df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
238
+ # df1["Human Gene"] = df1["Human Gene"].str.upper()
239
+ # # print(df1.head(50))
240
+ # # print()
241
+ # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
242
+ # # time.sleep(2)
243
+ # # Create the slider with increments of 5 up to 100
244
+ #
245
+ # # Set the maximum number of genes to display up to 100
246
+ # value_gene = min(len(df1), 100)
247
+ #
248
+ # if value_gene > 0:
249
+ # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Treemap visualization of "
250
+ # # f"<span style='color:red; font-style: italic;'>genes</span> contextually "
251
+ # # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
252
+ # # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
253
+ # # unsafe_allow_html=True)
254
+ #
255
+ # st.markdown(
256
+ # f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
257
+ # f"</span>genes contextually and semantically similar to "
258
+ # f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. "
259
+ # f"Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>",
260
+ # unsafe_allow_html=True)
261
+ #
262
+ # df10 = df1.head(value_gene).copy()
263
+ # df10.index = (1 / df10.index) * 100000
264
+ # sizes = df10.index.tolist()
265
+ # df10.set_index('Human Gene', inplace=True)
266
+ #
267
+ # df3 = df1.copy()
268
+ # df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value_gene)["SIMILARITY"].round(2).astype(str)
269
+ # df3.reset_index(inplace=True)
270
+ # df3 = df3.rename(columns={'Human Gene': 'symbol2'})
271
+ # # Use df.query to get a subset of df1 based on ids in df2
272
+ # subset = df3.head(value_gene).query('symbol2 in @df2.symbol2')
273
+ # # Use merge to join the two DataFrames on id
274
+ # result = pd.merge(subset, df2, on='symbol2')
275
+ # # Show the result
276
+ # # print(result)
277
+ # # label = df10.index.tolist()
278
+ # # df2 = df10
279
+ # # print(df2)
280
+ # try:
281
+ # # Define the `text` column for labels and `href` column for links
282
+ # df10['text'] = df10.index
283
+ # df10['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
284
+ # '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10['text']]
285
+ # df10['href2'] = [f'https://www.genecards.org/cgi-bin/carddisp.pl?gene=' + c for c in df10['text']]
286
+ #
287
+ # df10['name'] = [c for c in result['Approved name']]
288
+ # assert isinstance(df10, object)
289
+ # df10.loc[:, 'database'] = database_name
290
+ #
291
+ # # print(df['name'])
292
+ #
293
+ # # Create the treemap using `px.treemap`
294
+ # fig = px.treemap(df10, path=[df10['text']], values=sizes,
295
+ # custom_data=['href', 'name', 'database', 'href2', 'text'],
296
+ # hover_name=(df3.head(value_gene)['SIMILARITY']))
297
+ #
298
+ # fig.update(layout_coloraxis_showscale=False)
299
+ # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
300
+ # fig.update_annotations(visible=False)
301
+ # fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
302
+ # hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
303
+ # texttemplate="<br><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}<br><br>"
304
+ # "%{customdata[1]}<br><br>"
305
+ # "<a href='%{customdata[0]}'>PubMed"
306
+ # "</a><br><br><a href='%{customdata[3]}'>GeneCard"
307
+ # "</span></a>")
308
+ # fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
309
+ # # # display the treemap in Streamlit
310
+ # # with treemap2:
311
+ #
312
+ # # st.pyplot(fig2)
313
+ # st.plotly_chart(fig, use_container_width=True)
314
+ #
315
+ # st.caption(
316
+ # "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
317
+ # st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
318
+ # st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
319
+ #
320
+ # csv = df1.head(value_gene).to_csv().encode('utf-8')
321
+ # st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
322
+ # file_name=f'{database_name}_genes.csv', mime='text/csv')
323
+ #
324
+ #
325
+ # except:
326
+ # st.warning(f"No similar genes related to {query} within the {database_name} corpus were found.")
327
 
328
+ st.markdown("---")
329
+
330
+ df1 = table.copy()
331
+ df2 = pd.read_csv('Human Genes.csv')
332
+ m = df1.Word.isin(df2.symbol)
333
+ df1 = df1[m]
334
+ df1.rename(columns={'Word': 'Genes'}, inplace=True)
335
+ df_len = len(df1)
336
+ print(len(df1))
337
+
338
+ # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
339
+ # f"<span style='color:red; font-style: italic;'>proteins</span> contextually "
340
+ # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
341
+ # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
342
+ # unsafe_allow_html=True)
343
+
344
+ # Set the number of proteins to display
345
+ value_gene = min(df_len, 100)
346
+
347
+ st.markdown(
348
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
349
+ f"</span>human genes contextually and semantically similar to "
350
+ f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name} </span>corpus. Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>",
351
+ unsafe_allow_html=True)
352
+
353
+ df11 = df1.head(value_gene).copy()
354
+
355
+ df11.index = (1 / df11.index) * 10000
356
+ sizes = df11.index.tolist()
357
+
358
+ df11.set_index('Genes', inplace=True)
359
+
360
+ df4 = df1.copy()
361
+ # print(df4.head(10))
362
+ df4["SIMILARITY"] = 'Similarity Score ' + df4.head(value_gene)["SIMILARITY"].round(2).astype(str)
363
+ df4.reset_index(inplace=True)
364
+ # df4 = df4.rename(columns={'Protein': 'symbol2'})
365
+ # print(df4)
366
+ # # Use df.query to get a subset of df1 based on ids in df2
367
+ # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
368
+ # # Use merge to join the two DataFrames on id
369
+ # result = pd.merge(subset, df2b, on='symbol2')
370
+ # print(result)
371
+ if value_gene <= df_len:
372
+ # Define the `text` column for labels and `href` column for links
373
+ df11['text'] = df11.index
374
+ df11['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
375
+ '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df11['text']]
376
+ df11['href2'] = [f'https://www.genecards.org/cgi-bin/carddisp.pl?gene=' + c for c in df11['text']]
377
+ assert isinstance(df11, object)
378
+ df11['database'] = database_name
379
+
380
+ # df11['name'] = [c for c in result['Approved name']]
381
+
382
+ # Create the treemap using `px.treemap`
383
+ fig = px.treemap(df11, path=[df11['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
384
+ hover_name=(df4.head(value_gene)['SIMILARITY']))
385
+
386
+ fig.update(layout_coloraxis_showscale=False)
387
+ fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
388
+ fig.update_annotations(visible=False)
389
+ fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
390
+ hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
391
+ texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
392
+ "<a href='%{customdata[0]}'>PubMed"
393
+ "</a><br><br><a href='%{customdata[2]}'>GeneCard"
394
+ "</span></a>")
395
+ fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightPink"])
396
+ # # display the treemap in Streamlit
397
+ # with treemap2:
398
+
399
+ # st.pyplot(fig2)
400
+ st.plotly_chart(fig, use_container_width=True)
401
+
402
+ # st.caption(
403
+ # "Gene designation and database provided by KEGG homo sapien gene list: https://rest.kegg.jp/list/hsa")
404
+ # st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
405
+ st.caption("Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
406
+ st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
407
+ st.caption("Gene information provided by GeneCards: https://www.genecards.org//")
408
+
409
+ csv = df1.head(value_gene).to_csv().encode('utf-8')
410
+ st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
411
+ file_name=f'{database_name}_genes.csv', mime='text/csv')
412
+
413
+
414
+ else:
415
+ st.warning(
416
+ f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
417
+ st.markdown("---")
418
+ # print()
419
+ # print("Human genes similar to " + str(query))
420
+ df1 = table.copy()
421
+ df2 = pd.read_csv('kegg_drug_list_lowercase.csv')
422
+ m = df1.Word.isin(df2.drugs)
423
+ df1 = df1[m]
424
+ df1.rename(columns={'Word': 'Drugs'}, inplace=True)
425
+ df_len = len(df1)
426
+ # print(len(df1))
427
+ # df1["Human Gene"] = df1["Human Gene"].str.upper()
428
+ # print(df1.head(50))
429
+ # print()
430
+ # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
431
+ # time.sleep(2)
432
+ # Create the slider with increments of 5 up to 100
433
 
434
+ # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
435
+ value_drug = min(df1.shape[0], 100)
 
436
 
437
+ # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
438
+ # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
439
+ # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
440
+ # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
441
+ # unsafe_allow_html=True)
 
442
 
443
+ st.markdown(
444
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_drug} "
445
+ f"</span>Drugs contextually and semantically similar to "
446
+ f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
447
+ unsafe_allow_html=True)
448
+
449
+ df13 = df1.head(value_drug).copy()
450
+
451
+ df13.index = (1 / df13.index) * 10000
452
+ sizes = df13.index.tolist()
453
+
454
+ df13.set_index('Drugs', inplace=True)
455
+
456
+ df6 = df1.copy()
457
+ # print(df4.head(10))
458
+ df6["SIMILARITY"] = 'Similarity Score ' + df6.head(value_drug)["SIMILARITY"].round(2).astype(str)
459
+ df6.reset_index(inplace=True)
460
+ # df4 = df4.rename(columns={'Protein': 'symbol2'})
461
+ # print(df4)
462
+ # # Use df.query to get a subset of df1 based on ids in df2
463
+ # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
464
+ # # Use merge to join the two DataFrames on id
465
+ # result = pd.merge(subset, df2b, on='symbol2')
466
+ # print(result)
467
+ if value_drug <= df_len:
468
+ # Define the `text` column for labels and `href` column for links
469
+ # Reset the index
470
+ df13.reset_index(inplace=True)
471
+
472
+ # Replace hyphens with spaces in the 'text' column
473
+ df13['Drugs'] = df13['Drugs'].str.replace('-', ' ')
474
+
475
+ # Set the 'text' column back as the index
476
+ df13.set_index('Drugs', inplace=True)
477
+ df13['text'] = df13.index
478
+ df13['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
479
+ '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df13['text']]
480
+ df13['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df13['text']]
481
+ assert isinstance(df13, object)
482
+ df13['database'] = database_name
483
+
484
+ # df11['name'] = [c for c in result['Approved name']]
485
+
486
+ # Create the treemap using `px.treemap`
487
+ fig = px.treemap(df13, path=[df13['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
488
+ hover_name=(df6.head(value_drug)['SIMILARITY']))
489
+
490
+ fig.update(layout_coloraxis_showscale=False)
491
+ fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
492
+ fig.update_annotations(visible=False)
493
+ fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
494
+ hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
495
+ texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
496
+ "<a href='%{customdata[0]}'>PubMed"
497
+ "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
498
+ "</span></a>")
499
+ fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["Thistle"])
500
+ # # display the treemap in Streamlit
501
+ # with treemap2:
502
+
503
+ # st.pyplot(fig2)
504
+ st.plotly_chart(fig, use_container_width=True)
505
+
506
+ st.caption(
507
+ "Drug designation and database provided by KEGG: https://www.kegg.jp/kegg/drug/")
508
+
509
+ csv = df1.head(value_drug).to_csv().encode('utf-8')
510
+ st.download_button(label=f"download top {value_drug} drugs (csv)", data=csv,
511
+ file_name=f'{database_name}_drugs.csv', mime='text/csv')
512
+
513
+
514
+ else:
515
+ st.warning(
516
+ f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number")
517
  st.markdown("---")
 
518
  #
519
+ # st.markdown("---")
520
+ # # print()
521
+ # # print("Human genes similar to " + str(query))
522
+ # df1 = table.copy()
523
+ # df2 = pd.read_csv('diseasesKegg.csv')
524
+ # m = df1.Word.isin(df2.disease)
525
+ # df1 = df1[m]
526
+ # df1.rename(columns={'Word': 'Disease'}, inplace=True)
527
+ # df_len = len(df1)
528
+ # # print(len(df1))
529
+ # # df1["Human Gene"] = df1["Human Gene"].str.upper()
530
+ # # print(df1.head(50))
531
+ # # print()
532
+ # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
533
+ # # time.sleep(2)
534
+ # # Create the slider with increments of 5 up to 100
535
+ #
536
+ # # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
537
+ # value_disease = min(df1.shape[0], 100)
538
+ #
539
+ # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
540
+ # # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
541
+ # # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
542
+ # # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
543
+ # # unsafe_allow_html=True)
544
+ #
545
+ # st.markdown(
546
+ # f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_disease} "
547
+ # f"</span>Diseases contextually and semantically similar to "
548
+ # f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
549
+ # unsafe_allow_html=True)
550
+ #
551
+ # df14 = df1.head(value_disease).copy()
552
+ #
553
+ # df14.index = (1 / df14.index) * 10000
554
+ # sizes = df14.index.tolist()
555
+ #
556
+ # df14.set_index('Disease', inplace=True)
557
+ #
558
+ # df7 = df1.copy()
559
+ # # print(df4.head(10))
560
+ # df7["SIMILARITY"] = 'Similarity Score ' + df7.head(value_disease)["SIMILARITY"].round(2).astype(str)
561
+ # df7.reset_index(inplace=True)
562
+ # # df4 = df4.rename(columns={'Protein': 'symbol2'})
563
+ # # print(df4)
564
+ # # # Use df.query to get a subset of df1 based on ids in df2
565
+ # # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
566
+ # # # Use merge to join the two DataFrames on id
567
+ # # result = pd.merge(subset, df2b, on='symbol2')
568
+ # # print(result)
569
+ # if value_disease <= df_len:
570
+ # # Define the `text` column for labels and `href` column for links
571
+ # # Reset the index
572
+ # df14.reset_index(inplace=True)
573
+ #
574
+ # # Replace hyphens with spaces in the 'text' column
575
+ # df14['Disease'] = df14['Disease'].str.replace('-', ' ')
576
+ #
577
+ # # Set the 'text' column back as the index
578
+ # df14.set_index('Disease', inplace=True)
579
+ # df14['text'] = df14.index
580
+ # df14['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
581
+ # '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df14['text']]
582
+ # df14['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df14['text']]
583
+ # assert isinstance(df14, object)
584
+ # df14['database'] = database_name
585
+ #
586
+ # # df11['name'] = [c for c in result['Approved name']]
587
+ #
588
+ # # Create the treemap using `px.treemap`
589
+ # fig = px.treemap(df14, path=[df14['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
590
+ # hover_name=(df7.head(value_disease)['SIMILARITY']))
591
+ #
592
+ # fig.update(layout_coloraxis_showscale=False)
593
+ # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
594
+ # fig.update_annotations(visible=False)
595
+ # fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
596
+ # hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
597
+ # texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
598
+ # "<a href='%{customdata[0]}'>PubMed"
599
+ # "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
600
+ # "</span></a>")
601
+ # fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["PaleGoldenRod"])
602
+ # # # display the treemap in Streamlit
603
+ # # with treemap2:
604
+ #
605
+ # # st.pyplot(fig2)
606
+ # st.plotly_chart(fig, use_container_width=True)
607
+ #
608
+ # st.caption("Disease designation and database provided by KEGG: https://www.genome.jp/kegg/disease/")
609
+ #
610
+ # csv = df1.head(value_disease).to_csv().encode('utf-8')
611
+ # st.download_button(label=f"download top {value_disease} diseases (csv)", data=csv,
612
+ # file_name=f'{database_name}_disease.csv', mime='text/csv')
613
+ #
614
+ #
615
+ # else:
616
+ # st.warning(
617
+ # f"This selection exceeds the number of similar diseases related to {query} within the {database_name} corpus, please choose a lower number")
618
+ # st.markdown("---")
619
+
620
+ # st.markdown("---")
621
+ # # print()
622
+ # # print("Human genes similar to " + str(query))
623
+ # df1 = table.copy()
624
+ # df2 = pd.read_csv('pathwaysKegg.csv')
625
+ # m = df1.Word.isin(df2.pathway)
626
+ # df1 = df1[m]
627
+ # df1.rename(columns={'Word': 'Pathway'}, inplace=True)
628
+ # df_len = len(df1)
629
+ # # print(len(df1))
630
+ # # df1["Human Gene"] = df1["Human Gene"].str.upper()
631
+ # # print(df1.head(50))
632
+ # # print()
633
+ # # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
634
+ # # time.sleep(2)
635
+ # # Create the slider with increments of 5 up to 100
636
+ #
637
+ # # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
638
+ # value_pathway = min(df1.shape[0], 100)
639
+ #
640
+ # # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
641
+ # # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
642
+ # # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
643
+ # # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
644
+ # # unsafe_allow_html=True)
645
+ #
646
+ # st.markdown(
647
+ # f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_pathway} "
648
+ # f"</span>Pathways contextually and semantically similar to "
649
+ # f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
650
+ # unsafe_allow_html=True)
651
+ #
652
+ # df16 = df1.head(value_pathway).copy()
653
+ #
654
+ # df16.index = (1 / df16.index) * 10000
655
+ # sizes = df16.index.tolist()
656
+ #
657
+ # df16.set_index('Pathway', inplace=True)
658
+ #
659
+ # df9 = df1.copy()
660
+ # # print(df4.head(10))
661
+ # df9["SIMILARITY"] = 'Similarity Score ' + df9.head(value_pathway)["SIMILARITY"].round(2).astype(str)
662
+ # df9.reset_index(inplace=True)
663
+ # # df4 = df4.rename(columns={'Protein': 'symbol2'})
664
+ # # print(df4)
665
+ # # # Use df.query to get a subset of df1 based on ids in df2
666
+ # # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
667
+ # # # Use merge to join the two DataFrames on id
668
+ # # result = pd.merge(subset, df2b, on='symbol2')
669
+ # # print(result)
670
+ # if value_pathway <= df_len:
671
+ # # Define the `text` column for labels and `href` column for links
672
+ # # Reset the index
673
+ # df16.reset_index(inplace=True)
674
+ #
675
+ # # Replace hyphens with spaces in the 'text' column
676
+ # df16['Pathway'] = df16['Pathway'].str.replace('-', ' ')
677
+ #
678
+ # # Set the 'text' column back as the index
679
+ # df16.set_index('Pathway', inplace=True)
680
+ # df16['text'] = df16.index
681
+ # df16['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
682
+ # '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df16['text']]
683
+ # df16['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df16['text']]
684
+ # assert isinstance(df16, object)
685
+ # df16['database'] = database_name
686
+ #
687
+ # # df11['name'] = [c for c in result['Approved name']]
688
+ #
689
+ # # Create the treemap using `px.treemap`
690
+ # fig = px.treemap(df16, path=[df16['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
691
+ # hover_name=(df9.head(value_pathway)['SIMILARITY']))
692
+ #
693
+ # fig.update(layout_coloraxis_showscale=False)
694
+ # fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
695
+ # fig.update_annotations(visible=False)
696
+ # fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
697
+ # hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
698
+ # texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
699
+ # "<a href='%{customdata[0]}'>PubMed"
700
+ # "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
701
+ # "</span></a>")
702
+ # fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["FloralWhite"])
703
+ # # # display the treemap in Streamlit
704
+ # # with treemap2:
705
+ #
706
+ # # st.pyplot(fig2)
707
+ # st.plotly_chart(fig, use_container_width=True)
708
+ #
709
+ # st.caption("Pathway designation and database provided by KEGG: https://www.genome.jp/kegg/pathway.html")
710
+ #
711
+ # csv = df1.head(value_pathway).to_csv().encode('utf-8')
712
+ # st.download_button(label=f"download top {value_pathway} pathways (csv)", data=csv,
713
+ # file_name=f'{database_name}_pathways.csv', mime='text/csv')
714
+ #
715
+ #
716
+ # else:
717
+ # st.warning(
718
+ # f"This selection exceeds the number of similar pathways related to {query} within the {database_name} corpus, please choose a lower number")
719
+ # st.markdown("---")
720
 
721
+ st.markdown("---")
722
  # print()
723
  # print("Human genes similar to " + str(query))
724
  df1 = table.copy()
725
+ df2 = pd.read_csv('phytochemicals.csv')
726
+ m = df1.Word.isin(df2.phyto)
727
+ df1 = df1[m]
728
+ df1.rename(columns={'Word': 'Phytochemical'}, inplace=True)
729
+ df_len = len(df1)
730
+ # print(len(df1))
731
+ # df1["Human Gene"] = df1["Human Gene"].str.upper()
732
  # print(df1.head(50))
733
  # print()
734
  # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
735
  # time.sleep(2)
736
  # Create the slider with increments of 5 up to 100
737
 
738
+ # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
739
+ value_phyto = min(df1.shape[0], 100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
 
741
+ # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
742
+ # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
743
+ # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
744
+ # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
745
+ # unsafe_allow_html=True)
746
+
747
+ st.markdown(
748
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_phyto} "
749
+ f"</span>Phytochemicals contextually and semantically similar to "
750
+ f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
751
+ f"Click on the squares to expand and also the Pubmed and Wikipedia links for more compound information</span></p></b>",
752
+ unsafe_allow_html=True)
753
+
754
+ df15 = df1.head(value_phyto).copy()
755
+
756
+ df15.index = (1 / df15.index) * 10000
757
+ sizes = df15.index.tolist()
758
+
759
+ df15.set_index('Phytochemical', inplace=True)
760
+
761
+ df8 = df1.copy()
762
+ # print(df4.head(10))
763
+ df8["SIMILARITY"] = 'Similarity Score ' + df8.head(value_phyto)["SIMILARITY"].round(2).astype(str)
764
+ df8.reset_index(inplace=True)
765
+ # df4 = df4.rename(columns={'Protein': 'symbol2'})
766
+ # print(df4)
767
+ # # Use df.query to get a subset of df1 based on ids in df2
768
+ # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
769
+ # # Use merge to join the two DataFrames on id
770
+ # result = pd.merge(subset, df2b, on='symbol2')
771
+ # print(result)
772
+ if value_phyto <= df_len:
773
+ # Define the `text` column for labels and `href` column for links
774
+ # Reset the index
775
+ df15.reset_index(inplace=True)
776
+
777
+ # Replace hyphens with spaces in the 'text' column
778
+ df15['Phytochemical'] = df15['Phytochemical'].str.replace('-', ' ')
779
+
780
+ # Set the 'text' column back as the index
781
+ df15.set_index('Phytochemical', inplace=True)
782
+ df15['text'] = df15.index
783
+ df15['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
784
+ '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df15['text']]
785
+ df15['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df15['text']]
786
+ assert isinstance(df15, object)
787
+ df15['database'] = database_name
788
+
789
+ # df11['name'] = [c for c in result['Approved name']]
790
+
791
+ # Create the treemap using `px.treemap`
792
+ fig = px.treemap(df15, path=[df15['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
793
+ hover_name=(df8.head(value_phyto)['SIMILARITY']))
794
+
795
+ fig.update(layout_coloraxis_showscale=False)
796
+ fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
797
+ fig.update_annotations(visible=False)
798
+ fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
799
+ hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
800
+ texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
801
+ "<a href='%{customdata[0]}'>PubMed"
802
+ "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
803
+ "</span></a>")
804
+ fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightSeaGreen"])
805
+ # # display the treemap in Streamlit
806
+ # with treemap2:
807
+
808
+ # st.pyplot(fig2)
809
+ st.plotly_chart(fig, use_container_width=True)
810
+
811
+ st.caption("Phytochemical designation and database provided by PhytoHub: https://phytohub.eu/")
812
+
813
+ csv = df1.head(value_phyto).to_csv().encode('utf-8')
814
+ st.download_button(label=f"download top {value_phyto} phytochemicals (csv)", data=csv,
815
+ file_name=f'{database_name}_phytochemicals.csv', mime='text/csv')
816
+
817
+
818
+ else:
819
+ st.warning(
820
+ f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number")
821
+ st.markdown("---")
822
 
823
  # print()
824
  # print("Human genes similar to " + str(query))
825
  df1 = table.copy()
826
+ df2 = pd.read_csv('kegg_compounds_lowercase.csv')
827
+ m = df1.Word.isin(df2.compound)
828
  df1 = df1[m]
829
+ df1.rename(columns={'Word': 'Compounds'}, inplace=True)
 
830
  df_len = len(df1)
831
+ # df1["Human Gene"] = df1["Human Gene"].str.upper()
832
  # print(df1.head(50))
833
  # print()
834
  # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
835
  # time.sleep(2)
836
  # Create the slider with increments of 5 up to 100
837
 
838
+ # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
839
+ value_compound = min(df1.shape[0], 100)
840
+
841
+ # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
842
+ # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
843
+ # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
844
+ # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
845
+ # unsafe_allow_html=True)
846
+
847
  st.markdown(
848
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_compound} "
849
+ f"</span>Compounds contextually and semantically similar to "
850
+ f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
851
+ f"Click on the squares to expand and the Pubmed, Wikipedia, and KEGG links for more compound information (may take time to load)</span></p></b>",
852
  unsafe_allow_html=True)
853
+
854
+ df12 = df1.head(value_compound).copy()
855
+
856
+ df12.index = (1 / df12.index) * 10000
857
+ sizes = df12.index.tolist()
858
+
859
+ df12.set_index('Compounds', inplace=True)
860
+
861
+ df5 = df1.copy()
862
+ # print(df4.head(10))
863
+ df5["SIMILARITY"] = 'Similarity Score ' + df5.head(value_compound)["SIMILARITY"].round(2).astype(str)
864
+ df5.reset_index(inplace=True)
865
+ # df4 = df4.rename(columns={'Protein': 'symbol2'})
866
+ # print(df4)
867
+ # # Use df.query to get a subset of df1 based on ids in df2
868
+ # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
869
+ # # Use merge to join the two DataFrames on id
870
+ # result = pd.merge(subset, df2b, on='symbol2')
871
+ # print(result)
872
+
873
+ if value_compound <= df_len:
874
+ # Define the `text` column for labels and `href` column for links
875
+ # Reset the index
876
+ df12.reset_index(inplace=True)
877
+
878
+ # Replace hyphens with spaces in the 'text' column
879
+ df12['Compounds'] = df12['Compounds'].str.replace('-', ' ')
880
+
881
+ # Set the 'text' column back as the index
882
+ df12.set_index('Compounds', inplace=True)
883
+ df12['text'] = df12.index
884
+ df12['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
885
+ '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df12['text']]
886
+ df12['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in df12['text']]
887
+ df12['href3'] = [f'https://www.genome.jp/entry/{compound_id}' for compound_id in get_compound_ids(df12['text'])]
888
+ assert isinstance(df12, object)
889
+ df12['database'] = database_name
890
+
891
+ # df11['name'] = [c for c in result['Approved name']]
892
+
893
+ # Create the treemap using `px.treemap`
894
+ fig = px.treemap(df12, path=[df12['text']], values=sizes,
895
+ custom_data=['href', 'database', 'href2', 'text', 'href3'],
896
+ hover_name=(df5.head(value_compound)['SIMILARITY']))
897
+
898
+ fig.update(layout_coloraxis_showscale=False)
899
+ fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
900
+ fig.update_annotations(visible=False)
901
+ fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
902
  hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
903
+ texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
904
  "<a href='%{customdata[0]}'>PubMed"
905
+ "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
906
+ "</a><br><br><a href='%{customdata[4]}'>KEGG Compound Page"
907
  "</span></a>")
 
 
 
908
 
909
+ fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightYellow"])
910
+ # # display the treemap in Streamlit
911
+ # with treemap2:
912
 
913
+ # st.pyplot(fig2)
914
+ st.plotly_chart(fig, use_container_width=True)
915
 
916
+ st.caption("Compound designation and database provided by KEGG: https://www.kegg.jp/kegg/compound/")
 
 
917
 
918
+ csv = df1.head(value_compound).to_csv().encode('utf-8')
919
+ st.download_button(label=f"download top {value_compound} compounds (csv)", data=csv,
920
+ file_name=f'{database_name}_compounds.csv', mime='text/csv')
921
 
922
+
923
+ else:
924
+ st.warning(
925
+ f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
926
  st.markdown("---")
927
 
928
 
929
+ def save_comment(comment):
930
+ with open('comments.txt', 'a') as f:
931
+ f.write(f'{comment}\n')
932
+
933
+
934
+ def save_comment_threaded(comment):
935
+ t = threading.Thread(target=save_comment, args=(comment,))
936
+ t.start()
937
+
938
+
939
+ st.title("Abstractalytics Web App")
940
+ st.write("We appreciate your feedback!")
941
+
942
+ user_comment = st.text_area("Please send us your anonymous remarks/suggestions about the Abstractalytics Web App: "
943
+ "(app will pause while we save your comments)")
944
+
945
+ if st.button("Submit"):
946
+ if user_comment:
947
+ save_comment_threaded(user_comment)
948
+ st.success("Your comment has been saved. Thank you for your feedback!")
949
+ else:
950
+ st.warning("Please enter a comment before submitting.")
951
+
952
+ st.markdown("---")
953
+
954
  st.subheader("Cancer-related videos")
955
  if query:
956
+ idlist = []
957
  search_keyword = {query}
958
  html = urllib.request.urlopen("https://www.youtube.com/@NCIgov/search?query=cancer")
959
  html2 = urllib.request.urlopen("https://www.youtube.com/@CancerCenter/search?query=cancer")
 
979
 
980
  c1, c2, c3 = st.columns(3)
981
 
 
982
  with c1:
983
+ st.video("https://www.youtube.com/watch?v=" + video_ids[0])
984
  with c2:
985
+ st.video("https://www.youtube.com/watch?v=" + video_ids[1])
986
  with c3:
987
+ st.video("https://www.youtube.com/watch?v=" + video_ids[2])
988
  st.markdown("---")
989
 
990
+ # else:
991
+ # st.error("The password you entered is incorrect.")
992
+
993
+
994
+
995
+
996
+
997
+
998
+
999
+
1000
+
1001
+
1002
+
1003
+
1004
+
1005
+
1006
 
1007
 
1008