Spaces:

ligdis
/

1

Running

App Files Files Community

1 / app.py

ligdis

Update app.py

201cdf6 verified 6 months ago

raw

history blame contribute delete

15.9 kB

	# streamlit_app.py
	import streamlit as st
	import pandas as pd
	pd.options.mode.chained_assignment = None # default='warn'
	import numpy as np
	from io import BytesIO
	import os
	import sys

	# relative imports
	ROOT = os.path.abspath(os.path.dirname(__file__))
	sys.path.append(os.path.join(ROOT, "./src/"))
	from agstyler import PINLEFT, PRECISION_TWO, draw_grid

	st.set_page_config(
	page_title="Ligand Discovery 1: Fragment-Protein interactions in Chemical Proteomics Screening",
	page_icon=":home:",
	layout="wide", # "centered",
	initial_sidebar_state="expanded"
	)

	st.markdown("""
	<style>
	.css-13sdm1b.e16nr0p33 {
	margin-top: -75px;
	}
	</style>
	""", unsafe_allow_html=True)

	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	#header {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)

	pIdDf = pd.read_csv(os.path.join(ROOT, "./data/general/proteinNames4.tsv"), sep="\t")

	pId = pIdDf['UniProtID'].values
	pIdDes = pIdDf['Description'].values

	def applyFilters(df, pFil, pAdjFil, hitFil):
	if pFil != 'no filter':
	if pFil == '< 0.05':
	df = df[df['ml10p'] > 1.30103]
	else:
	df = df[df['ml10p'] > 2]

	if pAdjFil != 'no filter':
	if pAdjFil == '< 0.05':
	df = df[df['ml10adjP'] > 1.30103]
	elif pAdjFil == '< 0.1':
	df = df[df['ml10adjP'] > 1]
	else:
	df = df[df['ml10adjP'] > 0.60206]

	if hitFil != 'no filter':
	if hitFil == 'Low':
	df = df[df['mdfClass'] >= 1]
	elif hitFil == 'Medium (hits)':
	df = df[df['mdfClass'] >= 2]
	elif hitFil == 'Low (hits)':
	df = df[df['mdfClass'] >= 1]
	else:
	df = df[df['mdfClass'] == 3]

	return df

	def getVarText(df):
	if (len(df.index)) > 0:
	bestProt = df["geneName"].values[0]
	numProtHitss = len(df.index)
	df.index = np.arange(1,len(df)+1)
	protList = df.index[df["accession"]==myPid].tolist()
	if len(protList) > 0:
	protRank = protList[0]
	varText1 = "hit rank is"
	varText2 = "is best"
	else:
	varText1 = "is not a hit"
	protRank = ""
	varText2 = "protein is best"
	del protList
	else:
	bestProt = "No "
	numProtHitss = 0
	varText1 = "is not a hit"
	protRank = ""
	varText2 = ""
	return [bestProt, numProtHitss, protRank, varText1, varText2]


	st.sidebar.title("Ligand Discovery 1: Fragment-Protein Interactions")
	st.title("Chemical Proteomics Screening")

	help_input3='''

	use :blue[UniProt Accession], Short Gene Name(s) or Protein Description to search\n
	Tip:\n
	To change selected protein, :red[NO] need to select whole existing term, delete and type new.\n
	:blue[Just start to type new protein, old text will be automatically cleared]'''

	pIdIndex = st.sidebar.selectbox(label = "Select Protein", help = help_input3, options = range(len(pIdDes)), format_func= lambda x: pIdDes[x], index= 1706)

	myPid = pId[pIdIndex]

	intDfOri = pd.read_csv(os.path.join(ROOT, "./data/general/finalScreenTSV00"), sep="\t")
	column_names = intDfOri.columns
	finalScreenSuffixes = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13"]
	for eachSuffix in finalScreenSuffixes:
	fileAppend = "./data/general/finalScreenTSV" + eachSuffix
	df_temp = pd.read_csv(os.path.join(ROOT, fileAppend), header=None, sep="\t")
	df_temp.columns = intDfOri.columns
	intDfOri = pd.concat([intDfOri, df_temp], ignore_index=True)
	#intDfOri = pd.read_csv(os.path.join(ROOT, "./data/general/finalScreen.tsv"), sep="\t")

	fpDf = pd.read_csv(os.path.join(ROOT, "./data/general/finalFpTSV00"), sep="\t")
	column_names = fpDf.columns
	finalFpSuffixes = ["01", "02"]
	for eachSuffix in finalFpSuffixes:
	fileAppend = "./data/general/finalFpTSV" + eachSuffix
	df_temp = pd.read_csv(os.path.join(ROOT, fileAppend), header=None, sep="\t")
	df_temp.columns = fpDf.columns
	fpDf = pd.concat([fpDf, df_temp], ignore_index=True)
	#fpDf = pd.read_csv(os.path.join(ROOT, "./data/general/finalFp.tsv"), sep="\t")

	intDf = intDfOri[intDfOri["accession"]==myPid]

	if len(intDf) == 0:
	st.sidebar.write("We did :red[not] detect selected protein interacting with any fragment in our screen, try another protein")
	else:
	selectedGeneName = intDf["geneName"].values[0]
	tempDf = applyFilters(intDf, '< 0.05', '< 0.25', 'Medium (hits)')
	if (len(tempDf.index)) > 0:
	tempDf = tempDf.sort_values(by=['protHits', 'l2fc'], ascending=[True, False])
	bestFrag = tempDf["fragId"].values[0]
	top5Frags = tempDf["fragId"].values[0:5]
	numLigaHits = len(tempDf.index) # numLigaHits is already present in base input table
	varText3 = "is best"
	else:
	bestFrag = "No"
	varText3 = ""
	numLigaHits = 0

	######## Screening Protein Centric View ############

	st.write("Selected Protein: ", pIdDes[pIdIndex])
	st.markdown("""---""")

	st.subheader(f"First generation fragments (Gen1) that enrich :blue[{selectedGeneName}] over background")

	numInt = len(intDf.index)
	st.write(f":blue[{numInt}] (out of 407 screened) Gen1 fragments enrich {selectedGeneName}. :blue[{numLigaHits}]/{numInt} fragments are labelled as hits by applying medium filter Set (:blue[fS]). :blue[{bestFrag}] {varText3} hit.")

	if (numLigaHits/407)>0.1:
	hitRatio = np.round((numLigaHits/407)*100, 1)
	st.write(f":blue[{selectedGeneName}] is a :red[promiscuous] protein (hit/enriched ratio is :red[{hitRatio}]%).")

	col1, col2, col3, colX, colY, colZ = st.columns(6)
	with col1:
	pFilter = st.selectbox(label = "P Value", help = "Select threshold for signifiance", options = ('< 0.05', 'no filter', '< 0.01'))
	with col2:
	pAdjFilter = st.selectbox(label = "adjusted P Value", help = "Select threshold for signifiance", options = ('< 0.25', '< 0.1', 'no filter', '< 0.05'))
	with col3:
	help_input='''

	:blue[0]. no filter\n
	:blue[1]. Low Confidence: Fc > 1, Median > 1, p < 0.05, adj.p < 0.25, Rank < 500\n
	:blue[2]. Medium confidence (':blue[hits]'): Fc > 2.3, Median > 1, p < 0.05, adj.p < 0.25, Rank < 500\n
	:blue[3]. High Confidence (also ':blue[hits]'): Fc > 2.3, Median > 2.3, p < 0.01, adj.p < 0.1, Rank < 500'''
	mdfClass = st.selectbox(label = "filter Set (:blue[fS])", help = help_input, options = ('Medium (hits)', 'no filter', 'Low', 'High (hits)'))

	if len(tempDf.index) == 0:
	st.write(":red[No] data to display with selected filters. Applied :blue[no filter]")
	intDf = applyFilters(intDf, 'no filter', 'no filter', 'no filter')

	else:
	intDf = applyFilters(intDf, pFilter, pAdjFilter, mdfClass)

	del tempDf

	intDf = intDf.sort_values(by=['protHits', 'l2fc'], ascending=[True, False])

	col4, col5 = st.columns(2)
	with col4:
	formatter = {
	'fragId': ('Fragment', {**PINLEFT, 'width': 10}),
	'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 15}),
	'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 25}),
	'protHits': ('# Protein Hits', {'width': 15}),
	'mdfClass': ('fS', {'width': 10})
	}
	data = draw_grid(intDf, formatter=formatter, fit_columns=True, selection='none', max_height=340)
	with col5:
	st.image(os.path.join(ROOT, "./assets/proteinCentric/") + myPid + ".png")

	fragId = st.sidebar.selectbox(label = "Select Gen1 Fragment", options = intDf["fragId"])
	intDf2 = intDfOri[intDfOri["fragId"]==fragId]

	############ Screening Fragment Centric ###############################

	st.subheader(f"Proteins enriched by :blue[{fragId}]")

	tempDf2 = intDfOri[intDfOri["fragId"]==fragId]
	numProtDetected = len(tempDf2.index)
	tempDf2 = applyFilters(tempDf2, '< 0.05', '< 0.25', 'Medium (hits)')

	tempDf2 = tempDf2.sort_values(by=['ligHits', 'l2fc'], ascending=[True, False])
	[bestProt, numProtHits, protRank, varText, varText2] = getVarText(tempDf2)

	if len(tempDf2.index) == 0:
	intDf3 = applyFilters(intDf2, 'no filter', 'no filter', 'no filter')

	else:
	intDf3 = applyFilters(intDf2, pFilter, pAdjFilter, mdfClass)

	intDf3 = intDf3.sort_values(by=['ligHits', 'l2fc'], ascending=[True, False])

	st.sidebar.image(os.path.join(ROOT, "./assets/fragFiguresSingle/") + fragId + ".png")

	st.write(f":blue[{numProtDetected}] proteins were enriched by fragment {fragId} (Fc compared to CRF control). :blue[{numProtHits}] of those proteins were labelled as hits by applying medium filter Set (:blue[fS]). :blue[{bestProt}] {varText2} hit. :blue[{selectedGeneName}] {varText} :blue[{protRank}].")

	if (numProtHits/numProtDetected)>0.05:
	fragHitRatio = np.round((numProtHits/numProtDetected)*100, 1)
	st.write(f":blue[{fragId}] is :red[promiscuous] fragment (hit/enriched ratio is :red[{fragHitRatio}]%).")

	col6, col7 = st.columns(2)
	with col6:
	st.image(os.path.join(ROOT, "./assets/ligandVolcanoPlots/") + fragId + ".png")
	with col7:
	if len(tempDf2.index) == 0:
	st.write(":red[No] data to display with selected filters. Applied :blue[no filter]")
	formatter = {
	'accession': ('Protein', {**PINLEFT, 'width': 15}),
	'geneName': ('Gene', {**PINLEFT, 'width': 15}),
	'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 15}),
	'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 25}),
	'ligHits': ('# Fragment Hits', {'width': 15}),
	'mdfClass': ('fS', {'width': 10})
	}
	data = draw_grid(
	intDf3, formatter=formatter, fit_columns=True, selection='none', max_height=340)
	if not isinstance(protRank, str):
	if protRank < 5:
	st.subheader(f":blue[{fragId}-{selectedGeneName}] interaction: :first_place_medal:")
	st.write(f":blue[{fragId}] is in top 5 Fragment hits for {selectedGeneName}. :blue[{selectedGeneName}] is in top 5 Protein hits for {fragId}.")

	del tempDf2
	############# Fingerprinting / Elaborates Data ######################

	gen2List = ["C027", "C028", "C044", "C046", "C064", "C115", "C127", "C160", "C179", "C186", "C197", "C219", "C240", "C270", "C275", "C303", "C310", "C320", "C378", "C391"]
	if fragId in gen2List:
	st.markdown("""---""")
	# st.sidebar.markdown("""---""")

	############ Elaborates Protein Centric View ##########################
	st.subheader(f"Second generation fragments (Gen2) of :blue[{fragId}] that compete :blue[{selectedGeneName}]")

	gen1Df = fpDf[fpDf["gen1Lig"]==fragId]

	numGen2Ligs = len(np.unique(gen1Df['fragId']))

	temp4Df = gen1Df[gen1Df["accession"]==myPid]
	temp4Df = temp4Df[temp4Df["mdfClass"]>=1]
	# temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)
	temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)

	sidebarList1 = temp4Df["fragId"]

	if len(temp4Df.index)>0:
	bestGen2Lig = temp4Df['fragId'].values[0]
	varText5 = "is best Gen2 fragment hit."
	else:
	bestGen2Lig = ""
	varText5 = ""

	st.write(f":blue[{numGen2Ligs}] Gen2 fragments were screened in competition experiments against Gen1 fragment {fragId}. :blue[{len(temp4Df.index)}]/{numGen2Ligs} Gen2 fragments of {fragId} pass low filter Set (:blue[fS2]).")
	# :blue[{bestGen2Lig}] {varText5}")
	# compete :blue[{selectedGeneName}] after applying

	formatter = {
	'fragId': ('Gen2', {**PINLEFT, 'width': 10}),
	'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 10}),
	'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 20}),
	'protHits': ('# Gen2 Protein Hits', {'width': 15}),
	'mdfClass': ('fS2', {'width': 10})
	}

	col10, col11 = st.columns(2)
	with col10:
	st.write(f":blue[Hits] (fS2 > 0)")
	if len(temp4Df.index)>0:
	data = draw_grid(
	temp4Df, formatter=formatter, fit_columns=True, selection='none')

	temp4Df = gen1Df[gen1Df["accession"]==myPid]
	temp4Df = temp4Df[temp4Df["mdfClass"] < 1]
	temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)

	sidebarList2 = temp4Df["fragId"]

	with col11:
	st.write(f":orange[not] Hits (fS2 = 0)")
	data = draw_grid(
	temp4Df, formatter=formatter, fit_columns=True, selection='none')

	temp4Df = gen1Df[gen1Df["accession"]==myPid]
	temp4Df = temp4Df.sort_values(by='l2fc', ascending=True)
	temp4Df = temp4Df.sort_values(by=['mdfClass', 'l2fc'], ascending=[False, True])

	sideBarList = pd.concat([sidebarList1, sidebarList2], sort=False)

	############ Elaborates Side Bar Selection ##########################

	# gen2Id = st.sidebar.selectbox(label = "Select Gen2 Fragment", options = temp4Df["fragId"])
	gen2Id = st.sidebar.selectbox(label = "Select Gen2 Fragment", options = sideBarList)
	st.sidebar.image(os.path.join(ROOT, "./assets/fragFiguresSingle/") + gen2Id + ".png")

	############ Elaborates Fragment Centric View ##########################

	st.subheader(f"Proteins competed by :blue[{gen2Id}]")

	gen2Df = gen1Df[gen1Df["fragId"]==gen2Id]

	tempDf3 = applyFilters(gen2Df, '< 0.05', '< 0.25', 'Low')
	tempDf3 = tempDf3.sort_values(by=['ligHits', 'l2fc'], ascending=[True, True])

	[bestProt2, numProtHits2, protRank2, varText3, varText4] = getVarText(tempDf3)

	st.write(f":blue[{len(gen2Df.index)}] proteins were reduced in :blue[{gen2Id}] competition experiment (Fc compared to {fragId} control). :blue[{numProtHits2}] of those proteins are labelled as hits by applying low filter Set (:blue[fS2]). :blue[{bestProt2}] {varText4} hit. :blue[{selectedGeneName}] {varText3} :blue[{protRank2}].")

	col1, col2, col3, colX, colY, colZ = st.columns(6)
	with col1:
	pFilterFP = st.selectbox(label = "P Value", help = "Select threshold for signifiance", options = ('< 0.05', 'no filter', '< 0.01'), key = 'pFilterFP')
	with col2:
	pAdjFilterFP = st.selectbox(label = "adjusted P Value", help = "Select threshold for signifiance", options = ('< 0.25', 'no filter', '< 0.1', '< 0.05'), key = 'pAdjFilterFP')
	with col3:
	help_input2='''

	:blue[0]. no filter\n
	:blue[1]. Low Confidence (':blue[hits]'): Fc < -1, p < 0.05, adj.p < 0.25, Rank < 500\n
	:blue[2]. Medium confidence (also ':blue[hits]'): Fc < -1.65, p < 0.05, adj.p < 0.25, Rank < 500\n
	:blue[3]. High Confidence (also ':blue[hits]'): Fc < -2.3, p < 0.01, adj.p < 0.1, Rank < 500'''
	mdfClassFP = st.selectbox(label = "Gen2 Fragment filter Set (:blue[fS2])", help = help_input2, options = ('Low (hits)', 'Medium (hits)', 'no filter', 'High (hits)'), key = 'mdfClassFP')

	if len(tempDf3.index) == 0:
	st.write(":red[No] data to display with selected filters. Applied :blue[no filter]")
	gen2Df2 = applyFilters(gen2Df, 'no filter', 'no filter', 'no filter')

	else:
	gen2Df2 = applyFilters(gen2Df, pFilterFP, pAdjFilterFP, mdfClassFP)

	gen2Df2 = gen2Df2.sort_values(by=['ligHits', 'l2fc'], ascending=[True, True])

	col8, col9 = st.columns(2)
	with col8:
	formatter = {
	'accession': ('Protein', {**PINLEFT, 'width': 15}),
	'geneName': ('Gene', {**PINLEFT, 'width': 15}),
	'l2fc': ('Fc(log2)', {**PRECISION_TWO, 'width': 10}),
	'l2fcM': ('Fc Median adjusted', {**PRECISION_TWO, 'width': 20}),
	'ligHits': ('# Gen2 Fragment Hits', {'width': 15}),
	'mdfClass': ('fS2', {'width': 10})
	}
	data = draw_grid(
	gen2Df2, formatter=formatter, fit_columns=True, selection='none', max_height=340)
	with col9:
	st.image(os.path.join(ROOT, "./assets/gen2VolcanoPlots/") + gen2Id + ".png")