File size: 4,829 Bytes
007bea3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
import pandas as pd
import numpy as np
import re
import json
import requests
from io import StringIO
from perplexity import Perplexity
import codecs



st.set_page_config(layout="wide")
def dataread(kk, pasteduse,perplex_use):

    if pasteduse != '':
        data = pd.read_csv(StringIO(pasteduse),sep='\t',header=0)
        data['Gene']=data['Gene_name']
        data['Protein Change']=data['Protein Change'].apply(lambda x: str(x).replace('p.',''))

    st.write('The data you input is the following:')
    st.write(data)

    all_onc=[]
    all_perplex=[]
    all_query=[]
    querygeneprotein=[]
    for index,row in data.iterrows():
       
        if str(row['Gene']) != 'nan' and str(row['Protein Change']) != 'nan':
            
            if str(row['Protein Change']) != 'nan':
                d1=requests.get("https://www.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol="+row['Gene']+"&alteration="+row['Protein Change']+"&tumorType="+kk, headers={'Accept': 'application/json',"Authorization": 'Bearer 64f4aa64-2509-4500-994b-1f2a38422d44'})
                if perplex_use:
                    query="what drugs are used to treat "+ row['Gene']+" "+row['Protein Change']+" in "+kk+"?"
                    perplexity = Perplexity()   
                    answer=perplexity.search(query)
                    all_perplex.append(answer)
                    all_query.append(query)
                all_onc.append(d1.content)
                
                querygeneprotein.append(row['Gene']+"&alteration="+row['Protein Change'])
                #if perplex_use:
                #    perplexity.close()

    return all_onc,all_perplex,all_query,querygeneprotein


with st.form(key='parameters'):


    texttomatch=st.text_input('text to match',value='')
    pasteduse=st.text_area('paste text to search',value='')
    perplex_use=st.checkbox('Use Perplexity',value=False)
    abbrev_perplex=st.checkbox('Abbreviate Perplexity',value=True)
    submit_button = st.form_submit_button(label='Submit')
if submit_button:

    if pasteduse != '':
        kk,perplexity, query,querygeneprotein=dataread(texttomatch,pasteduse,perplex_use)
        dictionary_of_json_fda={}
        dictionary_of_json_text={}
        dictionary_of_json_text_perplexity={}
        dictionary_of_json_query={}
        dictionary_of_drugs={}
        for i in np.arange(0,len(kk)):
            output = codecs.decode(kk[i])
            dictionary_of_json_fda[querygeneprotein[i]]=json.loads(output).get('highestFdaLevel')
            #dictionary_of_drugs[querygeneprotein[i]]=json.loads(output).get('treatments').get('drugs').get('DrugName')
            dictionary_of_json_text[querygeneprotein[i]]=json.loads(output)
            dictionary_of_json_text_perplexity[querygeneprotein[i]]=perplexity[i]
            dictionary_of_json_query[querygeneprotein[i]]=query[i]
        ord_dict=sorted(dictionary_of_json_fda.items(), key=lambda item: str(item[1]))
        
        tt=st.tabs(pd.DataFrame(ord_dict).apply(lambda n: str(n[0])+' '+str(n[1]),axis=1).to_list())
        counter=0
       
        for i in ord_dict:
            with tt[counter]:
                st.write('The query was '+i[0])
                st.write('The results were:')   
                if dictionary_of_json_text[i[0]]['highestFdaLevel'] != '':
                    st.write('Drugs in this result')
                    drugnames=[]
                    for d in dictionary_of_json_text[i[0]]['treatments']:
                        for dd in d['drugs']:
                            drugnames.append(dd['drugName'])
                    for m in (set(drugnames)):
                        st.write(m)
                    st.write(str(len(set(drugnames)))+' drugs in this result')
                if perplex_use:
                    st.write('The perplexity query was '+dictionary_of_json_query[i[0]])
                    
                    
                    for jj in dictionary_of_json_text_perplexity[i[0]]:   
                        forout=jj
                    st.write('answer from perplexity')              
                    st.write(forout["answer"])
                    for m in (set(drugnames)):
                        if(re.findall(m,forout["answer"])) !=[]:
                            st.write('YES BOTH '+m+' is in the answer of perplexity')
                        else:
                            st.write('NO JUST ONCOKB '+m+' is not in the answer of perplexity')    
                st.write('all oncokb results')            
                st.json(dictionary_of_json_text[i[0]])  
                if perplex_use and abbrev_perplex ==False:  
                    st.write('All results from perplexity:')
                    st.write(forout)
                    #breakpoint()    
                counter=counter+1