Browse files
@@ -76,7 +76,8 @@ st.markdown("---")
76 |
# # If the password is correct, show the app content
77 |
# if authenticate(password):
78 |
opt ="Select a PubMed Corpus", options=('Breast Cancer corpus', 'Lung Cancer corpus',
79 |
80 |
# if opt == "Clotting corpus":
81 |
# model_used = ("pubmed_model_clotting")
82 |
# num_abstracts = 45493
@@ -105,6 +106,10 @@ if opt == "Prostate Cancer corpus":
105 |
model_used = ("prostate_cancer_pubmed_model")
106 |
num_abstracts = 89782
107 |
database_name = "Prostate_cancer"
108 |
109 |
st.header(f":blue[{database_name} Pubmed corpus.]")
110 |
text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
@@ -126,111 +131,118 @@ if query:
126 |
bar.progress((i + 1) * 10)
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
# except:
160 |
# st.error("Term occurrence is too low - please try another term")
161 |
# st.stop()
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
173 |
# f"<span style='color:red; font-style: italic;'>words</span> contextually "
174 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
175 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
176 |
# unsafe_allow_html=True)
177 |
178 |
# Set the max number of words to display
179 |
value_word = min(100, len(table2))
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
short_table.index = (1 / short_table.index) * 10
191 |
sizes = short_table.index.tolist()
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", texttemplate="<br><span "
214 |
"style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
215 |
"<a href='%{customdata[0]}'>PubMed"
216 |
"</a><br><br><a href='%{customdata[3]}'>Wikipedia"
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
file_name=f'{database_name}_words.csv', mime='text/csv')
230 |
231 |
232 |
f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number")
233 |
234 |
# st.markdown("---")
235 |
# # st.write(short_table)
236 |
# #
@@ -334,669 +346,673 @@ if query:
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
df6 = df1.copy()
465 |
# print(df4.head(10))
466 |
df6["SIMILARITY"] = 'Similarity Score ' + df6.head(value_drug)["SIMILARITY"].round(2).astype(str)
467 |
468 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
469 |
# print(df4)
470 |
# # Use df.query to get a subset of df1 based on ids in df2
471 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
472 |
# # Use merge to join the two DataFrames on id
473 |
# result = pd.merge(subset, df2b, on='symbol2')
474 |
# print(result)
475 |
if value_drug <= df_len:
476 |
# Define the `text` column for labels and `href` column for links
477 |
# Reset the index
478 |
479 |
480 |
# Replace hyphens with spaces in the 'text' column
481 |
df13['Drugs'] = df13['Drugs'].str.replace('-', ' ')
482 |
483 |
# Set the 'text' column back as the index
484 |
df13.set_index('Drugs', inplace=True)
485 |
df13['text'] = df13.index
486 |
df13['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
487 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df13['text']]
488 |
df13['href2'] = [f'' + c for c in df13['text']]
489 |
assert isinstance(df13, object)
490 |
df13['database'] = database_name
491 |
492 |
# df11['name'] = [c for c in result['Approved name']]
493 |
494 |
# Create the treemap using `px.treemap`
495 |
fig = px.treemap(df13, path=[df13['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
496 |
497 |
498 |
499 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
500 |
501 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
502 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
503 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
504 |
"<a href='%{customdata[0]}'>PubMed"
505 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
506 |
507 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["Thistle"])
508 |
# # display the treemap in Streamlit
509 |
# with treemap2:
510 |
511 |
# st.pyplot(fig2)
512 |
st.plotly_chart(fig, use_container_width=True)
513 |
514 |
st.caption("Drug designation and database provided by KEGG:")
515 |
516 |
csv = df1.head(value_drug).to_csv().encode('utf-8')
517 |
st.download_button(label=f"download top {value_drug} drugs (csv)", data=csv,
518 |
file_name=f'{database_name}_drugs.csv', mime='text/csv')
519 |
520 |
521 |
522 |
523 |
f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number")
524 |
525 |
526 |
# st.markdown("---")
527 |
# # print()
528 |
# # print("Human genes similar to " + str(query))
529 |
# df1 = table.copy()
530 |
# df2 = pd.read_csv('diseasesKegg.csv')
531 |
# m = df1.Word.isin(df2.disease)
532 |
# df1 = df1[m]
533 |
# df1.rename(columns={'Word': 'Disease'}, inplace=True)
534 |
# df_len = len(df1)
535 |
# # print(len(df1))
536 |
# # df1["Human Gene"] = df1["Human Gene"].str.upper()
537 |
# # print(df1.head(50))
538 |
# # print()
539 |
# # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
540 |
# # time.sleep(2)
541 |
# # Create the slider with increments of 5 up to 100
542 |
543 |
# # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
544 |
# value_disease = min(df1.shape[0], 100)
545 |
546 |
# # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
547 |
# # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
548 |
# # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
549 |
# # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
550 |
# # unsafe_allow_html=True)
551 |
552 |
# st.markdown(
553 |
# f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_disease} "
554 |
# f"</span>Diseases contextually and semantically similar to "
555 |
# f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
556 |
# unsafe_allow_html=True)
557 |
558 |
# df14 = df1.head(value_disease).copy()
559 |
560 |
# df14.index = (1 / df14.index) * 10000
561 |
# sizes = df14.index.tolist()
562 |
563 |
# df14.set_index('Disease', inplace=True)
564 |
565 |
# df7 = df1.copy()
566 |
# # print(df4.head(10))
567 |
# df7["SIMILARITY"] = 'Similarity Score ' + df7.head(value_disease)["SIMILARITY"].round(2).astype(str)
568 |
# df7.reset_index(inplace=True)
569 |
# # df4 = df4.rename(columns={'Protein': 'symbol2'})
570 |
# # print(df4)
571 |
# # # Use df.query to get a subset of df1 based on ids in df2
572 |
# # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
573 |
# # # Use merge to join the two DataFrames on id
574 |
# # result = pd.merge(subset, df2b, on='symbol2')
575 |
# # print(result)
576 |
# if value_disease <= df_len:
577 |
# # Define the `text` column for labels and `href` column for links
578 |
# # Reset the index
579 |
# df14.reset_index(inplace=True)
580 |
581 |
# # Replace hyphens with spaces in the 'text' column
582 |
# df14['Disease'] = df14['Disease'].str.replace('-', ' ')
583 |
584 |
# # Set the 'text' column back as the index
585 |
# df14.set_index('Disease', inplace=True)
586 |
# df14['text'] = df14.index
587 |
# df14['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
588 |
# '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df14['text']]
589 |
# df14['href2'] = [f'' + c for c in df14['text']]
590 |
# assert isinstance(df14, object)
591 |
# df14['database'] = database_name
592 |
593 |
# # df11['name'] = [c for c in result['Approved name']]
594 |
595 |
# # Create the treemap using `px.treemap`
596 |
# fig = px.treemap(df14, path=[df14['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
597 |
# hover_name=(df7.head(value_disease)['SIMILARITY']))
598 |
599 |
# fig.update(layout_coloraxis_showscale=False)
600 |
# fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
601 |
# fig.update_annotations(visible=False)
602 |
# fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
603 |
# hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
604 |
# texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
605 |
# "<a href='%{customdata[0]}'>PubMed"
606 |
# "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
607 |
# "</span></a>")
608 |
# fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["PaleGoldenRod"])
609 |
# # # display the treemap in Streamlit
610 |
# # with treemap2:
611 |
612 |
# # st.pyplot(fig2)
613 |
# st.plotly_chart(fig, use_container_width=True)
614 |
615 |
# st.caption("Disease designation and database provided by KEGG:")
616 |
617 |
# csv = df1.head(value_disease).to_csv().encode('utf-8')
618 |
# st.download_button(label=f"download top {value_disease} diseases (csv)", data=csv,
619 |
# file_name=f'{database_name}_disease.csv', mime='text/csv')
620 |
621 |
622 |
# else:
623 |
# st.warning(
624 |
# f"This selection exceeds the number of similar diseases related to {query} within the {database_name} corpus, please choose a lower number")
625 |
# st.markdown("---")
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
# print()
730 |
# print("Human genes similar to " + str(query))
731 |
df1 = table.copy()
732 |
df2 = pd.read_csv('phytochemicals.csv')
733 |
m = df1.Word.isin(df2.phyto)
734 |
df1 = df1[m]
735 |
df1.rename(columns={'Word': 'Phytochemical'}, inplace=True)
736 |
df_len = len(df1)
737 |
# print(len(df1))
738 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
739 |
# print(df1.head(50))
740 |
# print()
741 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
742 |
# time.sleep(2)
743 |
# Create the slider with increments of 5 up to 100
744 |
745 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
746 |
value_phyto = min(df1.shape[0], 100)
747 |
748 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
749 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
750 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
751 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
752 |
# unsafe_allow_html=True)
753 |
754 |
755 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_phyto} "
756 |
f"</span>Phytochemicals contextually and semantically similar to "
757 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
758 |
f"Click on the squares to expand and also the Pubmed and Wikipedia links for more compound information</span></p></b>",
759 |
760 |
761 |
df15 = df1.head(value_phyto).copy()
762 |
763 |
df15.index = (1 / df15.index) * 10000
764 |
sizes = df15.index.tolist()
765 |
766 |
df15.set_index('Phytochemical', inplace=True)
767 |
768 |
df8 = df1.copy()
769 |
# print(df4.head(10))
770 |
df8["SIMILARITY"] = 'Similarity Score ' + df8.head(value_phyto)["SIMILARITY"].round(2).astype(str)
771 |
772 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
773 |
# print(df4)
774 |
# # Use df.query to get a subset of df1 based on ids in df2
775 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
776 |
# # Use merge to join the two DataFrames on id
777 |
# result = pd.merge(subset, df2b, on='symbol2')
778 |
# print(result)
779 |
if value_phyto <= df_len:
780 |
# Define the `text` column for labels and `href` column for links
781 |
# Reset the index
782 |
783 |
784 |
# Replace hyphens with spaces in the 'text' column
785 |
df15['Phytochemical'] = df15['Phytochemical'].str.replace('-', ' ')
786 |
787 |
# Set the 'text' column back as the index
788 |
df15.set_index('Phytochemical', inplace=True)
789 |
df15['text'] = df15.index
790 |
df15['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
791 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df15['text']]
792 |
df15['href2'] = [f'' + c for c in df15['text']]
793 |
assert isinstance(df15, object)
794 |
df15['database'] = database_name
795 |
796 |
# df11['name'] = [c for c in result['Approved name']]
797 |
798 |
# Create the treemap using `px.treemap`
799 |
fig = px.treemap(df15, path=[df15['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
800 |
801 |
802 |
803 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
804 |
805 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
806 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
807 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
808 |
"<a href='%{customdata[0]}'>PubMed"
809 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
810 |
811 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightSeaGreen"])
812 |
# # display the treemap in Streamlit
813 |
# with treemap2:
814 |
815 |
# st.pyplot(fig2)
816 |
st.plotly_chart(fig, use_container_width=True)
817 |
818 |
st.caption("Phytochemical designation and database provided by PhytoHub:")
819 |
820 |
csv = df1.head(value_phyto).to_csv().encode('utf-8')
821 |
st.download_button(label=f"download top {value_phyto} phytochemicals (csv)", data=csv,
822 |
file_name=f'{database_name}_phytochemicals.csv', mime='text/csv')
823 |
824 |
825 |
826 |
827 |
f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number")
828 |
829 |
830 |
# print()
831 |
# print("Human genes similar to " + str(query))
832 |
df1 = table.copy()
833 |
df2 = pd.read_csv('kegg_compounds_lowercase.csv')
834 |
m = df1.Word.isin(df2.compound)
835 |
df1 = df1[m]
836 |
df1.rename(columns={'Word': 'Compounds'}, inplace=True)
837 |
df_len = len(df1)
838 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
839 |
# print(df1.head(50))
840 |
# print()
841 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
842 |
# time.sleep(2)
843 |
# Create the slider with increments of 5 up to 100
844 |
845 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
846 |
value_compound = min(df1.shape[0], 100)
847 |
848 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
849 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
850 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
851 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
852 |
# unsafe_allow_html=True)
853 |
854 |
855 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_compound} "
856 |
f"</span>Compounds contextually and semantically similar to "
857 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
858 |
f"Click on the squares to expand and the Pubmed, Wikipedia, and KEGG links for more compound information (may take time to load)</span></p></b>",
859 |
860 |
861 |
df12 = df1.head(value_compound).copy()
862 |
863 |
df12.index = (1 / df12.index) * 10000
864 |
sizes = df12.index.tolist()
865 |
866 |
df12.set_index('Compounds', inplace=True)
867 |
868 |
df5 = df1.copy()
869 |
# print(df4.head(10))
870 |
df5["SIMILARITY"] = 'Similarity Score ' + df5.head(value_compound)["SIMILARITY"].round(2).astype(str)
871 |
872 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
873 |
# print(df4)
874 |
# # Use df.query to get a subset of df1 based on ids in df2
875 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
876 |
# # Use merge to join the two DataFrames on id
877 |
# result = pd.merge(subset, df2b, on='symbol2')
878 |
# print(result)
879 |
880 |
if value_compound <= df_len:
881 |
# Define the `text` column for labels and `href` column for links
882 |
# Reset the index
883 |
884 |
885 |
# Replace hyphens with spaces in the 'text' column
886 |
df12['Compounds'] = df12['Compounds'].str.replace('-', ' ')
887 |
888 |
# Set the 'text' column back as the index
889 |
df12.set_index('Compounds', inplace=True)
890 |
df12['text'] = df12.index
891 |
df12['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
892 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df12['text']]
893 |
df12['href2'] = [f'' + c for c in df12['text']]
894 |
df12['href3'] = [f'{compound_id}' for compound_id in get_compound_ids(df12['text'])]
895 |
assert isinstance(df12, object)
896 |
df12['database'] = database_name
897 |
898 |
# df11['name'] = [c for c in result['Approved name']]
899 |
900 |
# Create the treemap using `px.treemap`
901 |
fig = px.treemap(df12, path=[df12['text']], values=sizes,
902 |
custom_data=['href', 'database', 'href2', 'text', 'href3'],
903 |
904 |
905 |
906 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
907 |
908 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
909 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
910 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
911 |
"<a href='%{customdata[0]}'>PubMed"
912 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
913 |
"</a><br><br><a href='%{customdata[4]}'>KEGG Compound Page"
914 |
915 |
916 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightYellow"])
917 |
# # display the treemap in Streamlit
918 |
# with treemap2:
919 |
920 |
# st.pyplot(fig2)
921 |
st.plotly_chart(fig, use_container_width=True)
922 |
923 |
st.caption("Compound designation and database provided by KEGG:")
924 |
925 |
csv = df1.head(value_compound).to_csv().encode('utf-8')
926 |
st.download_button(label=f"download top {value_compound} compounds (csv)", data=csv,
927 |
file_name=f'{database_name}_compounds.csv', mime='text/csv')
928 |
929 |
930 |
931 |
932 |
f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
933 |
934 |
935 |
# import os
936 |
937 |
# from datasets import Dataset
938 |
939 |
# # Check if the comments directory exists
940 |
# if os.path.exists('comments'):
941 |
# # Load the dataset from disk
942 |
# dataset = Dataset.load_from_disk('comments')
943 |
# else:
944 |
# # Create a new dataset
945 |
# dataset = Dataset.from_dict({'id': [], 'text': []})
946 |
947 |
# def save_comment(comment):
948 |
# # Check if the dataset exists
949 |
# if os.path.exists('comments'):
950 |
# dataset = Dataset.load_from_disk('comments')
951 |
# else:
952 |
# dataset = Dataset.from_dict({'id': [], 'text': []})
953 |
954 |
955 |
956 |
957 |
958 |
959 |
# dataset.save_to_disk('comments')
960 |
961 |
# print('Comment saved to dataset.')
962 |
963 |
# st.title("Abstractalytics Web App")
964 |
# st.write("We appreciate your feedback!")
965 |
966 |
# user_comment = st.text_area("Please send us your anonymous remarks/suggestions about the Abstractalytics Web App: "
967 |
# "(app will pause while we save your comments)")
968 |
969 |
# if st.button("Submit"):
970 |
# if user_comment:
971 |
# save_comment(user_comment)
972 |
# st.success("Your comment has been saved. Thank you for your feedback!")
973 |
# else:
974 |
# st.warning("Please enter a comment before submitting.")
975 |
976 |
# # Load the comments dataset from disk
977 |
# if os.path.exists('comments'):
978 |
# dataset = Dataset.load_from_disk('comments')
979 |
# else:
980 |
# dataset = Dataset.from_dict({'id': [], 'text': []})
981 |
982 |
# # Access the text column of the dataset
983 |
# comments = dataset['text']
984 |
985 |
# # Define the password
986 |
# PASSWORD = 'ram100pass'
987 |
988 |
# # Prompt the user for the password
989 |
# password = st.text_input('Password:', type='password')
990 |
991 |
# # Display the comments if the password is correct
992 |
# if password == PASSWORD:
993 |
# st.title('Comments')
994 |
# for comment in comments:
995 |
# st.write(comment)
996 |
# else:
997 |
# st.warning('Incorrect password')
998 |
999 |
1000 |
1001 |
st.subheader("Cancer-related videos")
1002 |
if query:
76 |
# # If the password is correct, show the app content
77 |
# if authenticate(password):
78 |
opt ="Select a PubMed Corpus", options=('Breast Cancer corpus', 'Lung Cancer corpus',
79 |
'Skin Cancer corpus', 'Colorectal Cancer corpus',
80 |
'Prostate Cancer corpus'))
81 |
# if opt == "Clotting corpus":
82 |
# model_used = ("pubmed_model_clotting")
83 |
# num_abstracts = 45493
106 |
model_used = ("prostate_cancer_pubmed_model")
107 |
num_abstracts = 89782
108 |
database_name = "Prostate_cancer"
109 |
if opt == "Skin Cancer corpus":
110 |
model_used = ("skin_cancer_pubmed_model")
111 |
num_abstracts = 176568
112 |
database_name = "Skin_cancer"
113 |
114 |
st.header(f":blue[{database_name} Pubmed corpus.]")
115 |
text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
131 |
bar.progress((i + 1) * 10)
132 |
133 |
134 |
135 |
model = Word2Vec.load(f"{model_used}") # you can continue training with the loaded model!
136 |
words = list(model.wv.key_to_index)
137 |
X = model.wv[model.wv.key_to_index]
138 |
# print(model.wv['bfgf'])
139 |
model2 = model.wv[query]
140 |
# print(model.wv.similar_by_word('bfgf', topn=50, restrict_vocab=None))
141 |
df = pd.DataFrame(X)
142 |
143 |
144 |
def get_compound_ids(compound_names):
145 |
with concurrent.futures.ThreadPoolExecutor() as executor:
146 |
compound_ids = list(, compound_names))
147 |
return compound_ids
148 |
149 |
150 |
import requests
151 |
152 |
153 |
def get_compound_id(compound_name):
154 |
url = f"{compound_name}"
155 |
response = requests.get(url)
156 |
if response.status_code == 200:
157 |
result = response.text.split('\n')
158 |
if result[0]:
159 |
compound_id = result[0].split('\t')[0]
160 |
return compound_id
161 |
return None
162 |
163 |
164 |
# except:
165 |
# st.error("Term occurrence is too low - please try another term")
166 |
# st.stop()
167 |
168 |
169 |
170 |
table = model.wv.most_similar_cosmul(query, topn=10000)
171 |
table = (pd.DataFrame(table))
172 |
+ = 'Rank'
173 |
table.columns = ['Word', 'SIMILARITY']
174 |
175 |
pd.set_option('display.max_rows', None)
176 |
table2 = table.copy()
177 |
178 |
179 |
180 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
181 |
# f"<span style='color:red; font-style: italic;'>words</span> contextually "
182 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
183 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
184 |
# unsafe_allow_html=True)
185 |
186 |
# Set the max number of words to display
187 |
value_word = min(100, len(table2))
188 |
189 |
190 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
191 |
f"</span>words contextually and semantically similar to "
192 |
f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
193 |
f"Click on the squares to expand and also the PubMed and Wikipedia links for more word information</span></p></b>",
194 |
195 |
196 |
short_table = table2.head(value_word).round(2)
197 |
short_table.index += 1
198 |
short_table.index = (1 / short_table.index) * 10
199 |
sizes = short_table.index.tolist()
200 |
201 |
short_table.set_index('Word', inplace=True)
202 |
table2["SIMILARITY"] = 'Similarity Score ' + table2.head(value_word)["SIMILARITY"].round(2).astype(str)
203 |
rank_num = list(short_table.index.tolist())
204 |
205 |
df = short_table
206 |
207 |
208 |
df['text'] = short_table.index
209 |
df['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
210 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
211 |
df['href2'] = [f'' + c for c in short_table.index]
212 |
213 |
df.loc[:, 'database'] = database_name
214 |
215 |
fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
216 |
217 |
218 |
219 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
220 |
221 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
222 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000", texttemplate="<br><span "
223 |
"style='font-family: Arial; font-size: 20px;'>%{customdata[1]}<br><br>"
224 |
"<a href='%{customdata[0]}'>PubMed"
225 |
"</a><br><br><a href='%{customdata[3]}'>Wikipedia"
226 |
227 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
228 |
229 |
# st.pyplot(fig2)
230 |
st.plotly_chart(fig, use_container_width=True)
231 |
232 |
# st.caption(
233 |
# "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC):")
234 |
# st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
235 |
236 |
csv = table2.head(value_word).to_csv().encode('utf-8')
237 |
st.download_button(label=f"download top {value_word} words (csv)", data=csv,
238 |
file_name=f'{database_name}_words.csv', mime='text/csv')
239 |
240 |
241 |
242 |
f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus, please choose a lower number")
243 |
except KeyError:
244 |
245 |
"This word is not found in the corpus, it could be because it is not spelled correctly or could be that it does not have enough representation within the corpus, please try again")
246 |
# st.markdown("---")
247 |
# # st.write(short_table)
248 |
# #
346 |
347 |
348 |
349 |
350 |
df1 = table.copy()
351 |
df2 = pd.read_csv('Human Genes.csv')
352 |
m = df1.Word.isin(df2.symbol)
353 |
df1 = df1[m]
354 |
df1.rename(columns={'Word': 'Genes'}, inplace=True)
355 |
df_len = len(df1)
356 |
357 |
358 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap to visualize "
359 |
# f"<span style='color:red; font-style: italic;'>proteins</span> contextually "
360 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
361 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
362 |
# unsafe_allow_html=True)
363 |
364 |
# Set the number of proteins to display
365 |
value_gene = min(df_len, 100)
366 |
367 |
368 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_gene} "
369 |
f"</span>human genes contextually and semantically similar to "
370 |
f"<span style='color:red; font-style: italic;'>{query} </span>within the <span style='color:red; font-style: italic;'>{database_name} </span>corpus. Click on the squares to expand and also the Pubmed and GeneCard links for more gene information</span></p></b>",
371 |
372 |
373 |
df11 = df1.head(value_gene).copy()
374 |
375 |
df11.index = (1 / df11.index) * 10000
376 |
sizes = df11.index.tolist()
377 |
378 |
df11.set_index('Genes', inplace=True)
379 |
380 |
df4 = df1.copy()
381 |
# print(df4.head(10))
382 |
df4["SIMILARITY"] = 'Similarity Score ' + df4.head(value_gene)["SIMILARITY"].round(2).astype(str)
383 |
384 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
385 |
# print(df4)
386 |
# # Use df.query to get a subset of df1 based on ids in df2
387 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
388 |
# # Use merge to join the two DataFrames on id
389 |
# result = pd.merge(subset, df2b, on='symbol2')
390 |
# print(result)
391 |
if value_gene <= df_len:
392 |
# Define the `text` column for labels and `href` column for links
393 |
df11['text'] = df11.index
394 |
df11['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
395 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df11['text']]
396 |
df11['href2'] = [f'' + c for c in df11['text']]
397 |
assert isinstance(df11, object)
398 |
df11['database'] = database_name
399 |
400 |
# df11['name'] = [c for c in result['Approved name']]
401 |
402 |
# Create the treemap using `px.treemap`
403 |
fig = px.treemap(df11, path=[df11['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
404 |
405 |
406 |
407 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
408 |
409 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
410 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
411 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
412 |
"<a href='%{customdata[0]}'>PubMed"
413 |
"</a><br><br><a href='%{customdata[2]}'>GeneCard"
414 |
415 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightPink"])
416 |
# # display the treemap in Streamlit
417 |
# with treemap2:
418 |
419 |
# st.pyplot(fig2)
420 |
st.plotly_chart(fig, use_container_width=True)
421 |
422 |
# st.caption(
423 |
# "Gene designation and database provided by KEGG homo sapien gene list:")
424 |
# st.caption("Gene information provided by GeneCards:")
425 |
426 |
"Human gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC):")
427 |
st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
428 |
st.caption("Gene information provided by GeneCards:")
429 |
430 |
csv = df1.head(value_gene).to_csv().encode('utf-8')
431 |
st.download_button(label=f"download top {value_gene} genes (csv)", data=csv,
432 |
file_name=f'{database_name}_genes.csv', mime='text/csv')
433 |
434 |
435 |
436 |
437 |
f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
438 |
439 |
# print()
440 |
# print("Human genes similar to " + str(query))
441 |
df1 = table.copy()
442 |
df2 = pd.read_csv('kegg_drug_list_lowercase.csv')
443 |
m = df1.Word.isin(df2.drugs)
444 |
df1 = df1[m]
445 |
df1.rename(columns={'Word': 'Drugs'}, inplace=True)
446 |
df_len = len(df1)
447 |
# print(len(df1))
448 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
449 |
# print(df1.head(50))
450 |
# print()
451 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
452 |
# time.sleep(2)
453 |
# Create the slider with increments of 5 up to 100
454 |
455 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
456 |
value_drug = min(df1.shape[0], 100)
457 |
458 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
459 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
460 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
461 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
462 |
# unsafe_allow_html=True)
463 |
464 |
465 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_drug} "
466 |
f"</span>Drugs contextually and semantically similar to "
467 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
468 |
469 |
470 |
df13 = df1.head(value_drug).copy()
471 |
472 |
df13.index = (1 / df13.index) * 10000
473 |
sizes = df13.index.tolist()
474 |
475 |
df13.set_index('Drugs', inplace=True)
476 |
477 |
df6 = df1.copy()
478 |
# print(df4.head(10))
479 |
df6["SIMILARITY"] = 'Similarity Score ' + df6.head(value_drug)["SIMILARITY"].round(2).astype(str)
480 |
481 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
482 |
# print(df4)
483 |
# # Use df.query to get a subset of df1 based on ids in df2
484 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
485 |
# # Use merge to join the two DataFrames on id
486 |
# result = pd.merge(subset, df2b, on='symbol2')
487 |
# print(result)
488 |
if value_drug <= df_len:
489 |
# Define the `text` column for labels and `href` column for links
490 |
# Reset the index
491 |
492 |
493 |
# Replace hyphens with spaces in the 'text' column
494 |
df13['Drugs'] = df13['Drugs'].str.replace('-', ' ')
495 |
496 |
# Set the 'text' column back as the index
497 |
df13.set_index('Drugs', inplace=True)
498 |
df13['text'] = df13.index
499 |
df13['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
500 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df13['text']]
501 |
df13['href2'] = [f'' + c for c in df13['text']]
502 |
assert isinstance(df13, object)
503 |
df13['database'] = database_name
504 |
505 |
# df11['name'] = [c for c in result['Approved name']]
506 |
507 |
# Create the treemap using `px.treemap`
508 |
fig = px.treemap(df13, path=[df13['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
509 |
510 |
511 |
512 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
513 |
514 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
515 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
516 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
517 |
"<a href='%{customdata[0]}'>PubMed"
518 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
519 |
520 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["Thistle"])
521 |
# # display the treemap in Streamlit
522 |
# with treemap2:
523 |
524 |
# st.pyplot(fig2)
525 |
st.plotly_chart(fig, use_container_width=True)
526 |
527 |
st.caption("Drug designation and database provided by KEGG:")
528 |
529 |
csv = df1.head(value_drug).to_csv().encode('utf-8')
530 |
st.download_button(label=f"download top {value_drug} drugs (csv)", data=csv,
531 |
file_name=f'{database_name}_drugs.csv', mime='text/csv')
532 |
533 |
534 |
535 |
536 |
f"This selection exceeds the number of similar drugs related to {query} within the {database_name} corpus, please choose a lower number")
537 |
538 |
539 |
# st.markdown("---")
540 |
# # print()
541 |
# # print("Human genes similar to " + str(query))
542 |
# df1 = table.copy()
543 |
# df2 = pd.read_csv('diseasesKegg.csv')
544 |
# m = df1.Word.isin(df2.disease)
545 |
# df1 = df1[m]
546 |
# df1.rename(columns={'Word': 'Disease'}, inplace=True)
547 |
# df_len = len(df1)
548 |
# # print(len(df1))
549 |
# # df1["Human Gene"] = df1["Human Gene"].str.upper()
550 |
# # print(df1.head(50))
551 |
# # print()
552 |
# # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
553 |
# # time.sleep(2)
554 |
# # Create the slider with increments of 5 up to 100
555 |
556 |
# # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
557 |
# value_disease = min(df1.shape[0], 100)
558 |
559 |
# # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
560 |
# # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
561 |
# # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
562 |
# # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
563 |
# # unsafe_allow_html=True)
564 |
565 |
# st.markdown(
566 |
# f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_disease} "
567 |
# f"</span>Diseases contextually and semantically similar to "
568 |
# f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
569 |
# unsafe_allow_html=True)
570 |
571 |
# df14 = df1.head(value_disease).copy()
572 |
573 |
# df14.index = (1 / df14.index) * 10000
574 |
# sizes = df14.index.tolist()
575 |
576 |
# df14.set_index('Disease', inplace=True)
577 |
578 |
# df7 = df1.copy()
579 |
# # print(df4.head(10))
580 |
# df7["SIMILARITY"] = 'Similarity Score ' + df7.head(value_disease)["SIMILARITY"].round(2).astype(str)
581 |
# df7.reset_index(inplace=True)
582 |
# # df4 = df4.rename(columns={'Protein': 'symbol2'})
583 |
# # print(df4)
584 |
# # # Use df.query to get a subset of df1 based on ids in df2
585 |
# # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
586 |
# # # Use merge to join the two DataFrames on id
587 |
# # result = pd.merge(subset, df2b, on='symbol2')
588 |
# # print(result)
589 |
# if value_disease <= df_len:
590 |
# # Define the `text` column for labels and `href` column for links
591 |
# # Reset the index
592 |
# df14.reset_index(inplace=True)
593 |
594 |
# # Replace hyphens with spaces in the 'text' column
595 |
# df14['Disease'] = df14['Disease'].str.replace('-', ' ')
596 |
597 |
# # Set the 'text' column back as the index
598 |
# df14.set_index('Disease', inplace=True)
599 |
# df14['text'] = df14.index
600 |
# df14['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
601 |
# '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df14['text']]
602 |
# df14['href2'] = [f'' + c for c in df14['text']]
603 |
# assert isinstance(df14, object)
604 |
# df14['database'] = database_name
605 |
606 |
# # df11['name'] = [c for c in result['Approved name']]
607 |
608 |
# # Create the treemap using `px.treemap`
609 |
# fig = px.treemap(df14, path=[df14['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
610 |
# hover_name=(df7.head(value_disease)['SIMILARITY']))
611 |
612 |
# fig.update(layout_coloraxis_showscale=False)
613 |
# fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
614 |
# fig.update_annotations(visible=False)
615 |
# fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
616 |
# hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
617 |
# texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
618 |
# "<a href='%{customdata[0]}'>PubMed"
619 |
# "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
620 |
# "</span></a>")
621 |
# fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["PaleGoldenRod"])
622 |
# # # display the treemap in Streamlit
623 |
# # with treemap2:
624 |
625 |
# # st.pyplot(fig2)
626 |
# st.plotly_chart(fig, use_container_width=True)
627 |
628 |
# st.caption("Disease designation and database provided by KEGG:")
629 |
630 |
# csv = df1.head(value_disease).to_csv().encode('utf-8')
631 |
# st.download_button(label=f"download top {value_disease} diseases (csv)", data=csv,
632 |
# file_name=f'{database_name}_disease.csv', mime='text/csv')
633 |
634 |
635 |
# else:
636 |
# st.warning(
637 |
# f"This selection exceeds the number of similar diseases related to {query} within the {database_name} corpus, please choose a lower number")
638 |
# st.markdown("---")
639 |
640 |
# st.markdown("---")
641 |
# # print()
642 |
# # print("Human genes similar to " + str(query))
643 |
# df1 = table.copy()
644 |
# df2 = pd.read_csv('pathwaysKegg.csv')
645 |
# m = df1.Word.isin(df2.pathway)
646 |
# df1 = df1[m]
647 |
# df1.rename(columns={'Word': 'Pathway'}, inplace=True)
648 |
# df_len = len(df1)
649 |
# # print(len(df1))
650 |
# # df1["Human Gene"] = df1["Human Gene"].str.upper()
651 |
# # print(df1.head(50))
652 |
# # print()
653 |
# # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
654 |
# # time.sleep(2)
655 |
# # Create the slider with increments of 5 up to 100
656 |
657 |
# # Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
658 |
# value_pathway = min(df1.shape[0], 100)
659 |
660 |
# # st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
661 |
# # f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
662 |
# # f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
663 |
# # f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
664 |
# # unsafe_allow_html=True)
665 |
666 |
# st.markdown(
667 |
# f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_pathway} "
668 |
# f"</span>Pathways contextually and semantically similar to "
669 |
# f"<span style='color:red; font-style: italic;'>{query}:</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> database. Click on the squares to expand and the Pubmed and Wikipedia links for more compound information</span></p></b>",
670 |
# unsafe_allow_html=True)
671 |
672 |
# df16 = df1.head(value_pathway).copy()
673 |
674 |
# df16.index = (1 / df16.index) * 10000
675 |
# sizes = df16.index.tolist()
676 |
677 |
# df16.set_index('Pathway', inplace=True)
678 |
679 |
# df9 = df1.copy()
680 |
# # print(df4.head(10))
681 |
# df9["SIMILARITY"] = 'Similarity Score ' + df9.head(value_pathway)["SIMILARITY"].round(2).astype(str)
682 |
# df9.reset_index(inplace=True)
683 |
# # df4 = df4.rename(columns={'Protein': 'symbol2'})
684 |
# # print(df4)
685 |
# # # Use df.query to get a subset of df1 based on ids in df2
686 |
# # subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
687 |
# # # Use merge to join the two DataFrames on id
688 |
# # result = pd.merge(subset, df2b, on='symbol2')
689 |
# # print(result)
690 |
# if value_pathway <= df_len:
691 |
# # Define the `text` column for labels and `href` column for links
692 |
# # Reset the index
693 |
# df16.reset_index(inplace=True)
694 |
695 |
# # Replace hyphens with spaces in the 'text' column
696 |
# df16['Pathway'] = df16['Pathway'].str.replace('-', ' ')
697 |
698 |
# # Set the 'text' column back as the index
699 |
# df16.set_index('Pathway', inplace=True)
700 |
# df16['text'] = df16.index
701 |
# df16['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
702 |
# '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df16['text']]
703 |
# df16['href2'] = [f'' + c for c in df16['text']]
704 |
# assert isinstance(df16, object)
705 |
# df16['database'] = database_name
706 |
707 |
# # df11['name'] = [c for c in result['Approved name']]
708 |
709 |
# # Create the treemap using `px.treemap`
710 |
# fig = px.treemap(df16, path=[df16['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
711 |
# hover_name=(df9.head(value_pathway)['SIMILARITY']))
712 |
713 |
# fig.update(layout_coloraxis_showscale=False)
714 |
# fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
715 |
# fig.update_annotations(visible=False)
716 |
# fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
717 |
# hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
718 |
# texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
719 |
# "<a href='%{customdata[0]}'>PubMed"
720 |
# "</a><br><br><a href='%{customdata[2]}'>Wikipedia"
721 |
# "</span></a>")
722 |
# fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["FloralWhite"])
723 |
# # # display the treemap in Streamlit
724 |
# # with treemap2:
725 |
726 |
# # st.pyplot(fig2)
727 |
# st.plotly_chart(fig, use_container_width=True)
728 |
729 |
# st.caption("Pathway designation and database provided by KEGG:")
730 |
731 |
# csv = df1.head(value_pathway).to_csv().encode('utf-8')
732 |
# st.download_button(label=f"download top {value_pathway} pathways (csv)", data=csv,
733 |
# file_name=f'{database_name}_pathways.csv', mime='text/csv')
734 |
735 |
736 |
# else:
737 |
# st.warning(
738 |
# f"This selection exceeds the number of similar pathways related to {query} within the {database_name} corpus, please choose a lower number")
739 |
# st.markdown("---")
740 |
741 |
742 |
# print()
743 |
# print("Human genes similar to " + str(query))
744 |
df1 = table.copy()
745 |
df2 = pd.read_csv('phytochemicals.csv')
746 |
m = df1.Word.isin(df2.phyto)
747 |
df1 = df1[m]
748 |
df1.rename(columns={'Word': 'Phytochemical'}, inplace=True)
749 |
df_len = len(df1)
750 |
# print(len(df1))
751 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
752 |
# print(df1.head(50))
753 |
# print()
754 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
755 |
# time.sleep(2)
756 |
# Create the slider with increments of 5 up to 100
757 |
758 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
759 |
value_phyto = min(df1.shape[0], 100)
760 |
761 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
762 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
763 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
764 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
765 |
# unsafe_allow_html=True)
766 |
767 |
768 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_phyto} "
769 |
f"</span>Phytochemicals contextually and semantically similar to "
770 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
771 |
f"Click on the squares to expand and also the Pubmed and Wikipedia links for more compound information</span></p></b>",
772 |
773 |
774 |
df15 = df1.head(value_phyto).copy()
775 |
776 |
df15.index = (1 / df15.index) * 10000
777 |
sizes = df15.index.tolist()
778 |
779 |
df15.set_index('Phytochemical', inplace=True)
780 |
781 |
df8 = df1.copy()
782 |
# print(df4.head(10))
783 |
df8["SIMILARITY"] = 'Similarity Score ' + df8.head(value_phyto)["SIMILARITY"].round(2).astype(str)
784 |
785 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
786 |
# print(df4)
787 |
# # Use df.query to get a subset of df1 based on ids in df2
788 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
789 |
# # Use merge to join the two DataFrames on id
790 |
# result = pd.merge(subset, df2b, on='symbol2')
791 |
# print(result)
792 |
if value_phyto <= df_len:
793 |
# Define the `text` column for labels and `href` column for links
794 |
# Reset the index
795 |
796 |
797 |
# Replace hyphens with spaces in the 'text' column
798 |
df15['Phytochemical'] = df15['Phytochemical'].str.replace('-', ' ')
799 |
800 |
# Set the 'text' column back as the index
801 |
df15.set_index('Phytochemical', inplace=True)
802 |
df15['text'] = df15.index
803 |
df15['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
804 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df15['text']]
805 |
df15['href2'] = [f'' + c for c in df15['text']]
806 |
assert isinstance(df15, object)
807 |
df15['database'] = database_name
808 |
809 |
# df11['name'] = [c for c in result['Approved name']]
810 |
811 |
# Create the treemap using `px.treemap`
812 |
fig = px.treemap(df15, path=[df15['text']], values=sizes, custom_data=['href', 'database', 'href2', 'text'],
813 |
814 |
815 |
816 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
817 |
818 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
819 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
820 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
821 |
"<a href='%{customdata[0]}'>PubMed"
822 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
823 |
824 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightSeaGreen"])
825 |
# # display the treemap in Streamlit
826 |
# with treemap2:
827 |
828 |
# st.pyplot(fig2)
829 |
st.plotly_chart(fig, use_container_width=True)
830 |
831 |
st.caption("Phytochemical designation and database provided by PhytoHub:")
832 |
833 |
csv = df1.head(value_phyto).to_csv().encode('utf-8')
834 |
st.download_button(label=f"download top {value_phyto} phytochemicals (csv)", data=csv,
835 |
file_name=f'{database_name}_phytochemicals.csv', mime='text/csv')
836 |
837 |
838 |
839 |
840 |
f"This selection exceeds the number of similar pythochemicals related to {query} within the {database_name} corpus, please choose a lower number")
841 |
842 |
843 |
# print()
844 |
# print("Human genes similar to " + str(query))
845 |
df1 = table.copy()
846 |
df2 = pd.read_csv('kegg_compounds_lowercase.csv')
847 |
m = df1.Word.isin(df2.compound)
848 |
df1 = df1[m]
849 |
df1.rename(columns={'Word': 'Compounds'}, inplace=True)
850 |
df_len = len(df1)
851 |
# df1["Human Gene"] = df1["Human Gene"].str.upper()
852 |
# print(df1.head(50))
853 |
# print()
854 |
# df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
855 |
# time.sleep(2)
856 |
# Create the slider with increments of 5 up to 100
857 |
858 |
# Remove the slider and set the value_compound to the minimum of the number of rows in the dataframe and 100
859 |
value_compound = min(df1.shape[0], 100)
860 |
861 |
# st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Visualize "
862 |
# f"<span style='color:red; font-style: italic;'>KEGG compounds</span> contextually "
863 |
# f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
864 |
# f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
865 |
# unsafe_allow_html=True)
866 |
867 |
868 |
f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_compound} "
869 |
f"</span>Compounds contextually and semantically similar to "
870 |
f"<span style='color:red; font-style: italic;'>{query}</span> within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus. "
871 |
f"Click on the squares to expand and the Pubmed, Wikipedia, and KEGG links for more compound information (may take time to load)</span></p></b>",
872 |
873 |
874 |
df12 = df1.head(value_compound).copy()
875 |
876 |
df12.index = (1 / df12.index) * 10000
877 |
sizes = df12.index.tolist()
878 |
879 |
df12.set_index('Compounds', inplace=True)
880 |
881 |
df5 = df1.copy()
882 |
# print(df4.head(10))
883 |
df5["SIMILARITY"] = 'Similarity Score ' + df5.head(value_compound)["SIMILARITY"].round(2).astype(str)
884 |
885 |
# df4 = df4.rename(columns={'Protein': 'symbol2'})
886 |
# print(df4)
887 |
# # Use df.query to get a subset of df1 based on ids in df2
888 |
# subset = df4.head(value_gene).query('symbol2 in @df2b.symbol2')
889 |
# # Use merge to join the two DataFrames on id
890 |
# result = pd.merge(subset, df2b, on='symbol2')
891 |
# print(result)
892 |
893 |
if value_compound <= df_len:
894 |
# Define the `text` column for labels and `href` column for links
895 |
# Reset the index
896 |
897 |
898 |
# Replace hyphens with spaces in the 'text' column
899 |
df12['Compounds'] = df12['Compounds'].str.replace('-', ' ')
900 |
901 |
# Set the 'text' column back as the index
902 |
df12.set_index('Compounds', inplace=True)
903 |
df12['text'] = df12.index
904 |
df12['href'] = [f'{database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
905 |
'+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df12['text']]
906 |
df12['href2'] = [f'' + c for c in df12['text']]
907 |
df12['href3'] = [f'{compound_id}' for compound_id in
908 |
909 |
assert isinstance(df12, object)
910 |
df12['database'] = database_name
911 |
912 |
# df11['name'] = [c for c in result['Approved name']]
913 |
914 |
# Create the treemap using `px.treemap`
915 |
fig = px.treemap(df12, path=[df12['text']], values=sizes,
916 |
custom_data=['href', 'database', 'href2', 'text', 'href3'],
917 |
918 |
919 |
920 |
fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
921 |
922 |
fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
923 |
hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
924 |
texttemplate="<span style='font-family: Arial; font-size: 20px;'>%{customdata[3]}<br><br>"
925 |
"<a href='%{customdata[0]}'>PubMed"
926 |
"</a><br><br><a href='%{customdata[2]}'>Wikipedia"
927 |
"</a><br><br><a href='%{customdata[4]}'>KEGG Compound Page"
928 |
929 |
930 |
fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["LightYellow"])
931 |
# # display the treemap in Streamlit
932 |
# with treemap2:
933 |
934 |
# st.pyplot(fig2)
935 |
st.plotly_chart(fig, use_container_width=True)
936 |
937 |
st.caption("Compound designation and database provided by KEGG:")
938 |
939 |
csv = df1.head(value_compound).to_csv().encode('utf-8')
940 |
st.download_button(label=f"download top {value_compound} compounds (csv)", data=csv,
941 |
file_name=f'{database_name}_compounds.csv', mime='text/csv')
942 |
943 |
944 |
945 |
946 |
f"This selection exceeds the number of similar proteins related to {query} within the {database_name} corpus, please choose a lower number")
947 |
948 |
949 |
# import os
950 |
951 |
# from datasets import Dataset
952 |
953 |
# # Check if the comments directory exists
954 |
# if os.path.exists('comments'):
955 |
# # Load the dataset from disk
956 |
# dataset = Dataset.load_from_disk('comments')
957 |
# else:
958 |
# # Create a new dataset
959 |
# dataset = Dataset.from_dict({'id': [], 'text': []})
960 |
961 |
# def save_comment(comment):
962 |
# # Check if the dataset exists
963 |
# if os.path.exists('comments'):
964 |
# dataset = Dataset.load_from_disk('comments')
965 |
# else:
966 |
# dataset = Dataset.from_dict({'id': [], 'text': []})
967 |
968 |
# # Append the new comment to the dataset
969 |
# new_comment = {'id': len(dataset), 'text': comment}
970 |
# dataset = dataset.concatenate(Dataset.from_dict(new_comment))
971 |
972 |
# # Save the dataset to disk
973 |
# dataset.save_to_disk('comments')
974 |
975 |
# print('Comment saved to dataset.')
976 |
977 |
# st.title("Abstractalytics Web App")
978 |
# st.write("We appreciate your feedback!")
979 |
980 |
# user_comment = st.text_area("Please send us your anonymous remarks/suggestions about the Abstractalytics Web App: "
981 |
# "(app will pause while we save your comments)")
982 |
983 |
# if st.button("Submit"):
984 |
# if user_comment:
985 |
# save_comment(user_comment)
986 |
# st.success("Your comment has been saved. Thank you for your feedback!")
987 |
# else:
988 |
# st.warning("Please enter a comment before submitting.")
989 |
990 |
# # Load the comments dataset from disk
991 |
# if os.path.exists('comments'):
992 |
# dataset = Dataset.load_from_disk('comments')
993 |
# else:
994 |
# dataset = Dataset.from_dict({'id': [], 'text': []})
995 |
996 |
# # Access the text column of the dataset
997 |
# comments = dataset['text']
998 |
999 |
# # Define the password
1000 |
# PASSWORD = 'ram100pass'
1001 |
1002 |
# # Prompt the user for the password
1003 |
# password = st.text_input('Password:', type='password')
1004 |
1005 |
# # Display the comments if the password is correct
1006 |
# if password == PASSWORD:
1007 |
# st.title('Comments')
1008 |
# for comment in comments:
1009 |
# st.write(comment)
1010 |
# else:
1011 |
# st.warning('Incorrect password')
1012 |
1013 |
1014 |
1015 |
1016 |
1017 |
st.subheader("Cancer-related videos")
1018 |
if query: