try_perplexity_api / app_initial.py
Eitan177's picture
Rename app.py to app_initial.py
460a5b6
raw
history blame
4.83 kB
import streamlit as st
import pandas as pd
import numpy as np
import re
import json
import requests
from io import StringIO
from perplexity import Perplexity
import codecs
st.set_page_config(layout="wide")
def dataread(kk, pasteduse,perplex_use):
if pasteduse != '':
data = pd.read_csv(StringIO(pasteduse),sep='\t',header=0)
data['Gene']=data['Gene_name']
data['Protein Change']=data['Protein Change'].apply(lambda x: str(x).replace('p.',''))
st.write('The data you input is the following:')
st.write(data)
all_onc=[]
all_perplex=[]
all_query=[]
querygeneprotein=[]
for index,row in data.iterrows():
if str(row['Gene']) != 'nan' and str(row['Protein Change']) != 'nan':
if str(row['Protein Change']) != 'nan':
d1=requests.get("https://www.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol="+row['Gene']+"&alteration="+row['Protein Change']+"&tumorType="+kk, headers={'Accept': 'application/json',"Authorization": 'Bearer 64f4aa64-2509-4500-994b-1f2a38422d44'})
if perplex_use:
query="what drugs are used to treat "+ row['Gene']+" "+row['Protein Change']+" in "+kk+"?"
perplexity = Perplexity()
answer=perplexity.search(query)
all_perplex.append(answer)
all_query.append(query)
all_onc.append(d1.content)
querygeneprotein.append(row['Gene']+"&alteration="+row['Protein Change'])
#if perplex_use:
# perplexity.close()
return all_onc,all_perplex,all_query,querygeneprotein
with st.form(key='parameters'):
texttomatch=st.text_input('text to match',value='')
pasteduse=st.text_area('paste text to search',value='')
perplex_use=st.checkbox('Use Perplexity',value=False)
abbrev_perplex=st.checkbox('Abbreviate Perplexity',value=True)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
if pasteduse != '':
kk,perplexity, query,querygeneprotein=dataread(texttomatch,pasteduse,perplex_use)
dictionary_of_json_fda={}
dictionary_of_json_text={}
dictionary_of_json_text_perplexity={}
dictionary_of_json_query={}
dictionary_of_drugs={}
for i in np.arange(0,len(kk)):
output = codecs.decode(kk[i])
dictionary_of_json_fda[querygeneprotein[i]]=json.loads(output).get('highestFdaLevel')
#dictionary_of_drugs[querygeneprotein[i]]=json.loads(output).get('treatments').get('drugs').get('DrugName')
dictionary_of_json_text[querygeneprotein[i]]=json.loads(output)
dictionary_of_json_text_perplexity[querygeneprotein[i]]=perplexity[i]
dictionary_of_json_query[querygeneprotein[i]]=query[i]
ord_dict=sorted(dictionary_of_json_fda.items(), key=lambda item: str(item[1]))
tt=st.tabs(pd.DataFrame(ord_dict).apply(lambda n: str(n[0])+' '+str(n[1]),axis=1).to_list())
counter=0
for i in ord_dict:
with tt[counter]:
st.write('The query was '+i[0])
st.write('The results were:')
if dictionary_of_json_text[i[0]]['highestFdaLevel'] != '':
st.write('Drugs in this result')
drugnames=[]
for d in dictionary_of_json_text[i[0]]['treatments']:
for dd in d['drugs']:
drugnames.append(dd['drugName'])
for m in (set(drugnames)):
st.write(m)
st.write(str(len(set(drugnames)))+' drugs in this result')
if perplex_use:
st.write('The perplexity query was '+dictionary_of_json_query[i[0]])
for jj in dictionary_of_json_text_perplexity[i[0]]:
forout=jj
st.write('answer from perplexity')
st.write(forout["answer"])
for m in (set(drugnames)):
if(re.findall(m,forout["answer"])) !=[]:
st.write('YES BOTH '+m+' is in the answer of perplexity')
else:
st.write('NO JUST ONCOKB '+m+' is not in the answer of perplexity')
st.write('all oncokb results')
st.json(dictionary_of_json_text[i[0]])
if perplex_use and abbrev_perplex ==False:
st.write('All results from perplexity:')
st.write(forout)
#breakpoint()
counter=counter+1