# streamlit_app.py import streamlit as st import pandas as pd pd.options.mode.chained_assignment = None # default='warn' import numpy as np from io import BytesIO import os import sys # relative imports ROOT = os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.join(ROOT, "./src/")) from agstyler import PINLEFT, PRECISION_TWO, draw_grid st.set_page_config( page_title="Ligand Discovery 1: Fragment-Protein interactions in Chemical Proteomics Screening", page_icon=":home:", layout="wide", # "centered", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) pIdDf = pd.read_csv(os.path.join(ROOT, "./data/general/proteinNames4.tsv"), sep="\t") pId = pIdDf['UniProtID'].values pIdDes = pIdDf['Description'].values def applyFilters(df, pFil, pAdjFil, hitFil): if pFil != 'no filter': if pFil == '< 0.05': df = df[df['ml10p'] > 1.30103] else: df = df[df['ml10p'] > 2] if pAdjFil != 'no filter': if pAdjFil == '< 0.05': df = df[df['ml10adjP'] > 1.30103] elif pAdjFil == '< 0.1': df = df[df['ml10adjP'] > 1] else: df = df[df['ml10adjP'] > 0.60206] if hitFil != 'no filter': if hitFil == 'Low': df = df[df['mdfClass'] >= 1] elif hitFil == 'Medium (hits)': df = df[df['mdfClass'] >= 2] elif hitFil == 'Low (hits)': df = df[df['mdfClass'] >= 1] else: df = df[df['mdfClass'] == 3] return df def getVarText(df): if (len(df.index)) > 0: bestProt = df["geneName"].values[0] numProtHitss = len(df.index) df.index = np.arange(1,len(df)+1) protList = df.index[df["accession"]==myPid].tolist() if len(protList) > 0: protRank = protList[0] varText1 = "hit rank is" varText2 = "is best" else: varText1 = "is not a hit" protRank = "" varText2 = "protein is best" del protList else: bestProt = "No " numProtHitss = 0 varText1 = "is not a hit" protRank = "" varText2 = "" return [bestProt, numProtHitss, protRank, varText1, varText2] st.sidebar.title("Ligand Discovery 1: Fragment-Protein Interactions") st.title("Chemical Proteomics Screening") help_input3=''' use **:blue[UniProt Accession]**, Short Gene Name(s) or Protein Description to search\n **Tip**:\n To change selected protein, **:red[NO]** need to select whole existing term, delete and type new.\n :blue[Just start to type new protein, old text will be automatically cleared]''' pIdIndex = st.sidebar.selectbox(label = "Select Protein", help = help_input3, options = range(len(pIdDes)), format_func= lambda x: pIdDes[x], index= 1706) myPid = pId[pIdIndex] intDfOri = pd.read_csv(os.path.join(ROOT, "./data/general/finalScreenTSV00"), sep="\t") column_names = intDfOri.columns finalScreenSuffixes = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13"] for eachSuffix in finalScreenSuffixes: fileAppend = "./data/general/finalScreenTSV" + eachSuffix df_temp = pd.read_csv(os.path.join(ROOT, fileAppend), header=None, sep="\t") df_temp.columns = intDfOri.columns intDfOri = pd.concat([intDfOri, df_temp], ignore_index=True) #intDfOri = pd.read_csv(os.path.join(ROOT, "./data/general/finalScreen.tsv"), sep="\t") fpDf = pd.read_csv(os.path.join(ROOT, "./data/general/finalFpTSV00"), sep="\t") column_names = fpDf.columns finalFpSuffixes = ["01", "02"] for eachSuffix in finalFpSuffixes: fileAppend = "./data/general/finalFpTSV" + eachSuffix df_temp = pd.read_csv(os.path.join(ROOT, fileAppend), header=None, sep="\t") df_temp.columns = fpDf.columns fpDf = pd.concat([fpDf, df_temp], ignore_index=True) #fpDf = pd.read_csv(os.path.join(ROOT, "./data/general/finalFp.tsv"), sep="\t") intDf = intDfOri[intDfOri["accession"]==myPid] if len(intDf) == 0: st.sidebar.write("We did **:red[not]** detect selected protein interacting with any fragment in our screen, try another protein") else: selectedGeneName = intDf["geneName"].values[0] tempDf = applyFilters(intDf, '< 0.05', '< 0.25', 'Medium (hits)') if (len(tempDf.index)) > 0: tempDf = tempDf.sort_values(by=['protHits', 'l2fc'], ascending=[True, False]) bestFrag = tempDf["fragId"].values[0] top5Frags = tempDf["fragId"].values[0:5] numLigaHits = len(tempDf.index) # numLigaHits is already present in base input table varText3 = "is best" else: bestFrag = "No" varText3 = "" numLigaHits = 0 ######## Screening Protein Centric View ############ st.write("**Selected Protein**: ", pIdDes[pIdIndex]) st.markdown("""---""") st.subheader(f"First generation fragments (Gen1) that enrich **:blue[{selectedGeneName}]** over background") numInt = len(intDf.index) st.write(f"**:blue[{numInt}]** (out of 407 screened) Gen1 fragments enrich **{selectedGeneName}**. **:blue[{numLigaHits}]**/{numInt} fragments are labelled as **hits** by applying **medium** filter Set **(:blue[fS])**. **:blue[{bestFrag}]** {varText3} **hit**.") if (numLigaHits/407)>0.1: hitRatio = np.round((numLigaHits/407)*100, 1) st.write(f"**:blue[{selectedGeneName}]** is a **:red[promiscuous]** protein (**hit**/enriched ratio is **:red[{hitRatio}]**%).") col1, col2, col3, colX, colY, colZ = st.columns(6) with col1: pFilter = st.selectbox(label = "*P* Value", help = "Select threshold for signifiance", options = ('< 0.05', 'no filter', '< 0.01')) with col2: pAdjFilter = st.selectbox(label = "adjusted *P* Value", help = "Select threshold for signifiance", options = ('< 0.25', '< 0.1', 'no filter', '< 0.05')) with col3: help_input=''' **:blue[0]**. no filter\n **:blue[1]**. Low Confidence: Fc > 1, Median > 1, p < 0.05, adj.p < 0.25, Rank < 500\n **:blue[2]**. Medium confidence ('**:blue[hits]**'): Fc > 2.3, Median > 1, p < 0.05, adj.p < 0.25, Rank < 500\n **:blue[3]**. High Confidence (also '**:blue[hits]**'): Fc > 2.3, Median > 2.3, p < 0.01, adj.p < 0.1, Rank < 500''' mdfClass = st.selectbox(label = "filter Set (**:blue[fS]**)", help = help_input, options = ('Medium (hits)', 'no filter', 'Low', 'High (hits)')) if len(tempDf.index) == 0: st.write("**:red[No]** data to display with selected filters. Applied **:blue[no filter]**") intDf = applyFilters(intDf, 'no filter', 'no filter', 'no filter') else: intDf = applyFilters(intDf, pFilter, pAdjFilter, mdfClass) del tempDf intDf = intDf.sort_values(by=['protHits', 'l2fc'], ascending=[True, False]) col4, col5 = st.columns(2) with col4: formatter = { 'fragId': ('Fragment', {**PINLEFT, 'width': 10}), 'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 15}), 'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 25}), 'protHits': ('# Protein Hits', {'width': 15}), 'mdfClass': ('fS', {'width': 10}) } data = draw_grid(intDf, formatter=formatter, fit_columns=True, selection='none', max_height=340) with col5: st.image(os.path.join(ROOT, "./assets/proteinCentric/") + myPid + ".png") fragId = st.sidebar.selectbox(label = "Select Gen1 Fragment", options = intDf["fragId"]) intDf2 = intDfOri[intDfOri["fragId"]==fragId] ############ Screening Fragment Centric ############################### st.subheader(f"Proteins enriched by **:blue[{fragId}]**") tempDf2 = intDfOri[intDfOri["fragId"]==fragId] numProtDetected = len(tempDf2.index) tempDf2 = applyFilters(tempDf2, '< 0.05', '< 0.25', 'Medium (hits)') tempDf2 = tempDf2.sort_values(by=['ligHits', 'l2fc'], ascending=[True, False]) [bestProt, numProtHits, protRank, varText, varText2] = getVarText(tempDf2) if len(tempDf2.index) == 0: intDf3 = applyFilters(intDf2, 'no filter', 'no filter', 'no filter') else: intDf3 = applyFilters(intDf2, pFilter, pAdjFilter, mdfClass) intDf3 = intDf3.sort_values(by=['ligHits', 'l2fc'], ascending=[True, False]) st.sidebar.image(os.path.join(ROOT, "./assets/fragFiguresSingle/") + fragId + ".png") st.write(f"**:blue[{numProtDetected}]** proteins were enriched by fragment **{fragId}** (Fc compared to **CRF** control). **:blue[{numProtHits}]** of those proteins were labelled as **hits** by applying **medium** filter Set **(:blue[fS])**. **:blue[{bestProt}]** {varText2} **hit**. **:blue[{selectedGeneName}]** {varText} **:blue[{protRank}]**.") if (numProtHits/numProtDetected)>0.05: fragHitRatio = np.round((numProtHits/numProtDetected)*100, 1) st.write(f"**:blue[{fragId}]** is **:red[promiscuous]** fragment (**hit**/enriched ratio is **:red[{fragHitRatio}]**%).") col6, col7 = st.columns(2) with col6: st.image(os.path.join(ROOT, "./assets/ligandVolcanoPlots/") + fragId + ".png") with col7: if len(tempDf2.index) == 0: st.write("**:red[No]** data to display with selected filters. Applied **:blue[no filter]**") formatter = { 'accession': ('Protein', {**PINLEFT, 'width': 15}), 'geneName': ('Gene', {**PINLEFT, 'width': 15}), 'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 15}), 'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 25}), 'ligHits': ('# Fragment Hits', {'width': 15}), 'mdfClass': ('fS', {'width': 10}) } data = draw_grid( intDf3, formatter=formatter, fit_columns=True, selection='none', max_height=340) if not isinstance(protRank, str): if protRank < 5: st.subheader(f"**:blue[{fragId}-{selectedGeneName}]** interaction: :first_place_medal:") st.write(f"**:blue[{fragId}]** is in top 5 **Fragment hits** for **{selectedGeneName}**. **:blue[{selectedGeneName}]** is in top 5 **Protein hits** for **{fragId}**.") del tempDf2 ############# Fingerprinting / Elaborates Data ###################### gen2List = ["C027", "C028", "C044", "C046", "C064", "C115", "C127", "C160", "C179", "C186", "C197", "C219", "C240", "C270", "C275", "C303", "C310", "C320", "C378", "C391"] if fragId in gen2List: st.markdown("""---""") # st.sidebar.markdown("""---""") ############ Elaborates Protein Centric View ########################## st.subheader(f"Second generation fragments (Gen2) of **:blue[{fragId}]** that compete **:blue[{selectedGeneName}]**") gen1Df = fpDf[fpDf["gen1Lig"]==fragId] numGen2Ligs = len(np.unique(gen1Df['fragId'])) temp4Df = gen1Df[gen1Df["accession"]==myPid] temp4Df = temp4Df[temp4Df["mdfClass"]>=1] # temp4Df = temp4Df.sort_values(by='l2fc', ascending=True) temp4Df = temp4Df.sort_values(by='l2fc', ascending=True) sidebarList1 = temp4Df["fragId"] if len(temp4Df.index)>0: bestGen2Lig = temp4Df['fragId'].values[0] varText5 = "is best Gen2 fragment hit." else: bestGen2Lig = "" varText5 = "" st.write(f"**:blue[{numGen2Ligs}]** Gen2 fragments were screened in **competition** experiments against Gen1 fragment **{fragId}**. **:blue[{len(temp4Df.index)}]**/{numGen2Ligs} Gen2 fragments of **{fragId}** pass **low** filter Set (**:blue[fS2]**).") # **:blue[{bestGen2Lig}]** {varText5}") # **compete** **:blue[{selectedGeneName}]** after applying formatter = { 'fragId': ('Gen2', {**PINLEFT, 'width': 10}), 'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 10}), 'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 20}), 'protHits': ('# Gen2 Protein Hits', {'width': 15}), 'mdfClass': ('fS2', {'width': 10}) } col10, col11 = st.columns(2) with col10: st.write(f":blue[Hits] (fS2 > 0)") if len(temp4Df.index)>0: data = draw_grid( temp4Df, formatter=formatter, fit_columns=True, selection='none') temp4Df = gen1Df[gen1Df["accession"]==myPid] temp4Df = temp4Df[temp4Df["mdfClass"] < 1] temp4Df = temp4Df.sort_values(by='l2fc', ascending=True) sidebarList2 = temp4Df["fragId"] with col11: st.write(f":orange[not] Hits (fS2 = 0)") data = draw_grid( temp4Df, formatter=formatter, fit_columns=True, selection='none') temp4Df = gen1Df[gen1Df["accession"]==myPid] temp4Df = temp4Df.sort_values(by='l2fc', ascending=True) temp4Df = temp4Df.sort_values(by=['mdfClass', 'l2fc'], ascending=[False, True]) sideBarList = pd.concat([sidebarList1, sidebarList2], sort=False) ############ Elaborates Side Bar Selection ########################## # gen2Id = st.sidebar.selectbox(label = "Select Gen2 Fragment", options = temp4Df["fragId"]) gen2Id = st.sidebar.selectbox(label = "Select Gen2 Fragment", options = sideBarList) st.sidebar.image(os.path.join(ROOT, "./assets/fragFiguresSingle/") + gen2Id + ".png") ############ Elaborates Fragment Centric View ########################## st.subheader(f"Proteins competed by **:blue[{gen2Id}]**") gen2Df = gen1Df[gen1Df["fragId"]==gen2Id] tempDf3 = applyFilters(gen2Df, '< 0.05', '< 0.25', 'Low') tempDf3 = tempDf3.sort_values(by=['ligHits', 'l2fc'], ascending=[True, True]) [bestProt2, numProtHits2, protRank2, varText3, varText4] = getVarText(tempDf3) st.write(f"**:blue[{len(gen2Df.index)}]** proteins were reduced in **:blue[{gen2Id}] competition** experiment (Fc compared to **{fragId}** control). **:blue[{numProtHits2}]** of those proteins are labelled as **hits** by applying **low** filter Set **(:blue[fS2])**. **:blue[{bestProt2}]** {varText4} **hit**. **:blue[{selectedGeneName}]** {varText3} **:blue[{protRank2}]**.") col1, col2, col3, colX, colY, colZ = st.columns(6) with col1: pFilterFP = st.selectbox(label = "*P* Value", help = "Select threshold for signifiance", options = ('< 0.05', 'no filter', '< 0.01'), key = 'pFilterFP') with col2: pAdjFilterFP = st.selectbox(label = "adjusted *P* Value", help = "Select threshold for signifiance", options = ('< 0.25', 'no filter', '< 0.1', '< 0.05'), key = 'pAdjFilterFP') with col3: help_input2=''' **:blue[0]**. no filter\n **:blue[1]**. Low Confidence ('**:blue[hits]**'): Fc < -1, p < 0.05, adj.p < 0.25, Rank < 500\n **:blue[2]**. Medium confidence (also '**:blue[hits]**'): Fc < -1.65, p < 0.05, adj.p < 0.25, Rank < 500\n **:blue[3]**. High Confidence (also '**:blue[hits]**'): Fc < -2.3, p < 0.01, adj.p < 0.1, Rank < 500''' mdfClassFP = st.selectbox(label = "Gen2 Fragment filter Set (**:blue[fS2]**)", help = help_input2, options = ('Low (hits)', 'Medium (hits)', 'no filter', 'High (hits)'), key = 'mdfClassFP') if len(tempDf3.index) == 0: st.write("**:red[No]** data to display with selected filters. Applied **:blue[no filter]**") gen2Df2 = applyFilters(gen2Df, 'no filter', 'no filter', 'no filter') else: gen2Df2 = applyFilters(gen2Df, pFilterFP, pAdjFilterFP, mdfClassFP) gen2Df2 = gen2Df2.sort_values(by=['ligHits', 'l2fc'], ascending=[True, True]) col8, col9 = st.columns(2) with col8: formatter = { 'accession': ('Protein', {**PINLEFT, 'width': 15}), 'geneName': ('Gene', {**PINLEFT, 'width': 15}), 'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 10}), 'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 20}), 'ligHits': ('# Gen2 Fragment Hits', {'width': 15}), 'mdfClass': ('fS2', {'width': 10}) } data = draw_grid( gen2Df2, formatter=formatter, fit_columns=True, selection='none', max_height=340) with col9: st.image(os.path.join(ROOT, "./assets/gen2VolcanoPlots/") + gen2Id + ".png")