# streamlit_app.py
import streamlit as st
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
import numpy as np
from io import BytesIO
import os
import sys
# relative imports
ROOT = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(ROOT, "./src/"))
from agstyler import PINLEFT, PRECISION_TWO, draw_grid
st.set_page_config(
page_title="Ligand Discovery 1: Fragment-Protein interactions in Chemical Proteomics Screening",
page_icon=":home:",
layout="wide", # "centered",
initial_sidebar_state="expanded"
)
st.markdown("""
""", unsafe_allow_html=True)
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
pIdDf = pd.read_csv(os.path.join(ROOT, "./data/general/proteinNames4.tsv"), sep="\t")
pId = pIdDf['UniProtID'].values
pIdDes = pIdDf['Description'].values
def applyFilters(df, pFil, pAdjFil, hitFil):
if pFil != 'no filter':
if pFil == '< 0.05':
df = df[df['ml10p'] > 1.30103]
else:
df = df[df['ml10p'] > 2]
if pAdjFil != 'no filter':
if pAdjFil == '< 0.05':
df = df[df['ml10adjP'] > 1.30103]
elif pAdjFil == '< 0.1':
df = df[df['ml10adjP'] > 1]
else:
df = df[df['ml10adjP'] > 0.60206]
if hitFil != 'no filter':
if hitFil == 'Low':
df = df[df['mdfClass'] >= 1]
elif hitFil == 'Medium (hits)':
df = df[df['mdfClass'] >= 2]
elif hitFil == 'Low (hits)':
df = df[df['mdfClass'] >= 1]
else:
df = df[df['mdfClass'] == 3]
return df
def getVarText(df):
if (len(df.index)) > 0:
bestProt = df["geneName"].values[0]
numProtHitss = len(df.index)
df.index = np.arange(1,len(df)+1)
protList = df.index[df["accession"]==myPid].tolist()
if len(protList) > 0:
protRank = protList[0]
varText1 = "hit rank is"
varText2 = "is best"
else:
varText1 = "is not a hit"
protRank = ""
varText2 = "protein is best"
del protList
else:
bestProt = "No "
numProtHitss = 0
varText1 = "is not a hit"
protRank = ""
varText2 = ""
return [bestProt, numProtHitss, protRank, varText1, varText2]
st.sidebar.title("Ligand Discovery 1: Fragment-Protein Interactions")
st.title("Chemical Proteomics Screening")
help_input3='''
use **:blue[UniProt Accession]**, Short Gene Name(s) or Protein Description to search\n
**Tip**:\n
To change selected protein, **:red[NO]** need to select whole existing term, delete and type new.\n
:blue[Just start to type new protein, old text will be automatically cleared]'''
pIdIndex = st.sidebar.selectbox(label = "Select Protein", help = help_input3, options = range(len(pIdDes)), format_func= lambda x: pIdDes[x], index= 1706)
myPid = pId[pIdIndex]
intDfOri = pd.read_csv(os.path.join(ROOT, "./data/general/finalScreenTSV00"), sep="\t")
column_names = intDfOri.columns
finalScreenSuffixes = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13"]
for eachSuffix in finalScreenSuffixes:
fileAppend = "./data/general/finalScreenTSV" + eachSuffix
df_temp = pd.read_csv(os.path.join(ROOT, fileAppend), header=None, sep="\t")
df_temp.columns = intDfOri.columns
intDfOri = pd.concat([intDfOri, df_temp], ignore_index=True)
#intDfOri = pd.read_csv(os.path.join(ROOT, "./data/general/finalScreen.tsv"), sep="\t")
fpDf = pd.read_csv(os.path.join(ROOT, "./data/general/finalFpTSV00"), sep="\t")
column_names = fpDf.columns
finalFpSuffixes = ["01", "02"]
for eachSuffix in finalFpSuffixes:
fileAppend = "./data/general/finalFpTSV" + eachSuffix
df_temp = pd.read_csv(os.path.join(ROOT, fileAppend), header=None, sep="\t")
df_temp.columns = fpDf.columns
fpDf = pd.concat([fpDf, df_temp], ignore_index=True)
#fpDf = pd.read_csv(os.path.join(ROOT, "./data/general/finalFp.tsv"), sep="\t")
intDf = intDfOri[intDfOri["accession"]==myPid]
if len(intDf) == 0:
st.sidebar.write("We did **:red[not]** detect selected protein interacting with any fragment in our screen, try another protein")
else:
selectedGeneName = intDf["geneName"].values[0]
tempDf = applyFilters(intDf, '< 0.05', '< 0.25', 'Medium (hits)')
if (len(tempDf.index)) > 0:
tempDf = tempDf.sort_values(by=['protHits', 'l2fc'], ascending=[True, False])
bestFrag = tempDf["fragId"].values[0]
top5Frags = tempDf["fragId"].values[0:5]
numLigaHits = len(tempDf.index) # numLigaHits is already present in base input table
varText3 = "is best"
else:
bestFrag = "No"
varText3 = ""
numLigaHits = 0
######## Screening Protein Centric View ############
st.write("**Selected Protein**: ", pIdDes[pIdIndex])
st.markdown("""---""")
st.subheader(f"First generation fragments (Gen1) that enrich **:blue[{selectedGeneName}]** over background")
numInt = len(intDf.index)
st.write(f"**:blue[{numInt}]** (out of 407 screened) Gen1 fragments enrich **{selectedGeneName}**. **:blue[{numLigaHits}]**/{numInt} fragments are labelled as **hits** by applying **medium** filter Set **(:blue[fS])**. **:blue[{bestFrag}]** {varText3} **hit**.")
if (numLigaHits/407)>0.1:
hitRatio = np.round((numLigaHits/407)*100, 1)
st.write(f"**:blue[{selectedGeneName}]** is a **:red[promiscuous]** protein (**hit**/enriched ratio is **:red[{hitRatio}]**%).")
col1, col2, col3, colX, colY, colZ = st.columns(6)
with col1:
pFilter = st.selectbox(label = "*P* Value", help = "Select threshold for signifiance", options = ('< 0.05', 'no filter', '< 0.01'))
with col2:
pAdjFilter = st.selectbox(label = "adjusted *P* Value", help = "Select threshold for signifiance", options = ('< 0.25', '< 0.1', 'no filter', '< 0.05'))
with col3:
help_input='''
**:blue[0]**. no filter\n
**:blue[1]**. Low Confidence: Fc > 1, Median > 1, p < 0.05, adj.p < 0.25, Rank < 500\n
**:blue[2]**. Medium confidence ('**:blue[hits]**'): Fc > 2.3, Median > 1, p < 0.05, adj.p < 0.25, Rank < 500\n
**:blue[3]**. High Confidence (also '**:blue[hits]**'): Fc > 2.3, Median > 2.3, p < 0.01, adj.p < 0.1, Rank < 500'''
mdfClass = st.selectbox(label = "filter Set (**:blue[fS]**)", help = help_input, options = ('Medium (hits)', 'no filter', 'Low', 'High (hits)'))
if len(tempDf.index) == 0:
st.write("**:red[No]** data to display with selected filters. Applied **:blue[no filter]**")
intDf = applyFilters(intDf, 'no filter', 'no filter', 'no filter')
else:
intDf = applyFilters(intDf, pFilter, pAdjFilter, mdfClass)
del tempDf
intDf = intDf.sort_values(by=['protHits', 'l2fc'], ascending=[True, False])
col4, col5 = st.columns(2)
with col4:
formatter = {
'fragId': ('Fragment', {**PINLEFT, 'width': 10}),
'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 15}),
'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 25}),
'protHits': ('# Protein Hits', {'width': 15}),
'mdfClass': ('fS', {'width': 10})
}
data = draw_grid(intDf, formatter=formatter, fit_columns=True, selection='none', max_height=340)
with col5:
st.image(os.path.join(ROOT, "./assets/proteinCentric/") + myPid + ".png")
fragId = st.sidebar.selectbox(label = "Select Gen1 Fragment", options = intDf["fragId"])
intDf2 = intDfOri[intDfOri["fragId"]==fragId]
############ Screening Fragment Centric ###############################
st.subheader(f"Proteins enriched by **:blue[{fragId}]**")
tempDf2 = intDfOri[intDfOri["fragId"]==fragId]
numProtDetected = len(tempDf2.index)
tempDf2 = applyFilters(tempDf2, '< 0.05', '< 0.25', 'Medium (hits)')
tempDf2 = tempDf2.sort_values(by=['ligHits', 'l2fc'], ascending=[True, False])
[bestProt, numProtHits, protRank, varText, varText2] = getVarText(tempDf2)
if len(tempDf2.index) == 0:
intDf3 = applyFilters(intDf2, 'no filter', 'no filter', 'no filter')
else:
intDf3 = applyFilters(intDf2, pFilter, pAdjFilter, mdfClass)
intDf3 = intDf3.sort_values(by=['ligHits', 'l2fc'], ascending=[True, False])
st.sidebar.image(os.path.join(ROOT, "./assets/fragFiguresSingle/") + fragId + ".png")
st.write(f"**:blue[{numProtDetected}]** proteins were enriched by fragment **{fragId}** (Fc compared to **CRF** control). **:blue[{numProtHits}]** of those proteins were labelled as **hits** by applying **medium** filter Set **(:blue[fS])**. **:blue[{bestProt}]** {varText2} **hit**. **:blue[{selectedGeneName}]** {varText} **:blue[{protRank}]**.")
if (numProtHits/numProtDetected)>0.05:
fragHitRatio = np.round((numProtHits/numProtDetected)*100, 1)
st.write(f"**:blue[{fragId}]** is **:red[promiscuous]** fragment (**hit**/enriched ratio is **:red[{fragHitRatio}]**%).")
col6, col7 = st.columns(2)
with col6:
st.image(os.path.join(ROOT, "./assets/ligandVolcanoPlots/") + fragId + ".png")
with col7:
if len(tempDf2.index) == 0:
st.write("**:red[No]** data to display with selected filters. Applied **:blue[no filter]**")
formatter = {
'accession': ('Protein', {**PINLEFT, 'width': 15}),
'geneName': ('Gene', {**PINLEFT, 'width': 15}),
'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 15}),
'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 25}),
'ligHits': ('# Fragment Hits', {'width': 15}),
'mdfClass': ('fS', {'width': 10})
}
data = draw_grid(
intDf3, formatter=formatter, fit_columns=True, selection='none', max_height=340)
if not isinstance(protRank, str):
if protRank < 5:
st.subheader(f"**:blue[{fragId}-{selectedGeneName}]** interaction: :first_place_medal:")
st.write(f"**:blue[{fragId}]** is in top 5 **Fragment hits** for **{selectedGeneName}**. **:blue[{selectedGeneName}]** is in top 5 **Protein hits** for **{fragId}**.")
del tempDf2
############# Fingerprinting / Elaborates Data ######################
gen2List = ["C027", "C028", "C044", "C046", "C064", "C115", "C127", "C160", "C179", "C186", "C197", "C219", "C240", "C270", "C275", "C303", "C310", "C320", "C378", "C391"]
if fragId in gen2List:
st.markdown("""---""")
# st.sidebar.markdown("""---""")
############ Elaborates Protein Centric View ##########################
st.subheader(f"Second generation fragments (Gen2) of **:blue[{fragId}]** that compete **:blue[{selectedGeneName}]**")
gen1Df = fpDf[fpDf["gen1Lig"]==fragId]
numGen2Ligs = len(np.unique(gen1Df['fragId']))
temp4Df = gen1Df[gen1Df["accession"]==myPid]
temp4Df = temp4Df[temp4Df["mdfClass"]>=1]
# temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)
temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)
sidebarList1 = temp4Df["fragId"]
if len(temp4Df.index)>0:
bestGen2Lig = temp4Df['fragId'].values[0]
varText5 = "is best Gen2 fragment hit."
else:
bestGen2Lig = ""
varText5 = ""
st.write(f"**:blue[{numGen2Ligs}]** Gen2 fragments were screened in **competition** experiments against Gen1 fragment **{fragId}**. **:blue[{len(temp4Df.index)}]**/{numGen2Ligs} Gen2 fragments of **{fragId}** pass **low** filter Set (**:blue[fS2]**).")
# **:blue[{bestGen2Lig}]** {varText5}")
# **compete** **:blue[{selectedGeneName}]** after applying
formatter = {
'fragId': ('Gen2', {**PINLEFT, 'width': 10}),
'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 10}),
'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 20}),
'protHits': ('# Gen2 Protein Hits', {'width': 15}),
'mdfClass': ('fS2', {'width': 10})
}
col10, col11 = st.columns(2)
with col10:
st.write(f":blue[Hits] (fS2 > 0)")
if len(temp4Df.index)>0:
data = draw_grid(
temp4Df, formatter=formatter, fit_columns=True, selection='none')
temp4Df = gen1Df[gen1Df["accession"]==myPid]
temp4Df = temp4Df[temp4Df["mdfClass"] < 1]
temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)
sidebarList2 = temp4Df["fragId"]
with col11:
st.write(f":orange[not] Hits (fS2 = 0)")
data = draw_grid(
temp4Df, formatter=formatter, fit_columns=True, selection='none')
temp4Df = gen1Df[gen1Df["accession"]==myPid]
temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)
temp4Df = temp4Df.sort_values(by=['mdfClass', 'l2fc'], ascending=[False, True])
sideBarList = pd.concat([sidebarList1, sidebarList2], sort=False)
############ Elaborates Side Bar Selection ##########################
# gen2Id = st.sidebar.selectbox(label = "Select Gen2 Fragment", options = temp4Df["fragId"])
gen2Id = st.sidebar.selectbox(label = "Select Gen2 Fragment", options = sideBarList)
st.sidebar.image(os.path.join(ROOT, "./assets/fragFiguresSingle/") + gen2Id + ".png")
############ Elaborates Fragment Centric View ##########################
st.subheader(f"Proteins competed by **:blue[{gen2Id}]**")
gen2Df = gen1Df[gen1Df["fragId"]==gen2Id]
tempDf3 = applyFilters(gen2Df, '< 0.05', '< 0.25', 'Low')
tempDf3 = tempDf3.sort_values(by=['ligHits', 'l2fc'], ascending=[True, True])
[bestProt2, numProtHits2, protRank2, varText3, varText4] = getVarText(tempDf3)
st.write(f"**:blue[{len(gen2Df.index)}]** proteins were reduced in **:blue[{gen2Id}] competition** experiment (Fc compared to **{fragId}** control). **:blue[{numProtHits2}]** of those proteins are labelled as **hits** by applying **low** filter Set **(:blue[fS2])**. **:blue[{bestProt2}]** {varText4} **hit**. **:blue[{selectedGeneName}]** {varText3} **:blue[{protRank2}]**.")
col1, col2, col3, colX, colY, colZ = st.columns(6)
with col1:
pFilterFP = st.selectbox(label = "*P* Value", help = "Select threshold for signifiance", options = ('< 0.05', 'no filter', '< 0.01'), key = 'pFilterFP')
with col2:
pAdjFilterFP = st.selectbox(label = "adjusted *P* Value", help = "Select threshold for signifiance", options = ('< 0.25', 'no filter', '< 0.1', '< 0.05'), key = 'pAdjFilterFP')
with col3:
help_input2='''
**:blue[0]**. no filter\n
**:blue[1]**. Low Confidence ('**:blue[hits]**'): Fc < -1, p < 0.05, adj.p < 0.25, Rank < 500\n
**:blue[2]**. Medium confidence (also '**:blue[hits]**'): Fc < -1.65, p < 0.05, adj.p < 0.25, Rank < 500\n
**:blue[3]**. High Confidence (also '**:blue[hits]**'): Fc < -2.3, p < 0.01, adj.p < 0.1, Rank < 500'''
mdfClassFP = st.selectbox(label = "Gen2 Fragment filter Set (**:blue[fS2]**)", help = help_input2, options = ('Low (hits)', 'Medium (hits)', 'no filter', 'High (hits)'), key = 'mdfClassFP')
if len(tempDf3.index) == 0:
st.write("**:red[No]** data to display with selected filters. Applied **:blue[no filter]**")
gen2Df2 = applyFilters(gen2Df, 'no filter', 'no filter', 'no filter')
else:
gen2Df2 = applyFilters(gen2Df, pFilterFP, pAdjFilterFP, mdfClassFP)
gen2Df2 = gen2Df2.sort_values(by=['ligHits', 'l2fc'], ascending=[True, True])
col8, col9 = st.columns(2)
with col8:
formatter = {
'accession': ('Protein', {**PINLEFT, 'width': 15}),
'geneName': ('Gene', {**PINLEFT, 'width': 15}),
'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 10}),
'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 20}),
'ligHits': ('# Gen2 Fragment Hits', {'width': 15}),
'mdfClass': ('fS2', {'width': 10})
}
data = draw_grid(
gen2Df2, formatter=formatter, fit_columns=True, selection='none', max_height=340)
with col9:
st.image(os.path.join(ROOT, "./assets/gen2VolcanoPlots/") + gen2Id + ".png")