3
File size: 3,463 Bytes
223c626
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import collections

# Global profiles
global_profiles_dict = {"Detectability": "detectability", "Promiscuity": "promiscuity"}

# Task suffixes
task_suf = collections.OrderedDict()

task_suf["_val_log2fc"] = "Log2FC"
task_suf["_gauss"] = "Gaussianized score"

task_suf["_bin_hit"] = "Protein hits (custom cutoffs)"
task_suf["_bin_50"] = "Top 50 proteins"
task_suf["_bin_100"] = "Top 100 proteins"
task_suf["_bin_250"] = "Top 250 proteins"
task_suf["_bin_500"] = "Top 500 proteins"
task_suf["_top_50"] = "Top 50 proteins"
task_suf["_top_100"] = "Top 100 proteins"
task_suf["_top_250"] = "Top 250 proteins"
task_suf["_top_500"] = "Top 500 proteins"
task_suf["_bottom_50"] = "Bottom 50 proteins"
task_suf["_bottom_100"] = "Bottom 100 proteins"
task_suf["_bottom_250"] = "Bottom 250 proteins"
task_suf["_bottom_500"] = "Bottom 500 proteins"

# Annotation type dict
annotation_type_dict = {
    "Sequence": [
        "InterPro Superfamily",
        "InterPro Family",
        "InterPro Domain",
        "InterPro Active site",
        "InterPro Binding site",
        "InterPro Conserved site",
        "InterPro Repeat",
        "InterPro PTM"
    ],
    "Functions": [
        "Panther Protein class",
        "Protein Atlas Protein class",
        "Protein Atlas Molecular function",
        "GO Molecular function",
    ],
    "Processes and pathways": [
        "Reactome Pathways",
        "KEGG Pathways",
        "WikiPathways",
        "Protein Atlas Biological process",
        "GO Biological process",
    ],
    "Localization": [
        "Protein Atlas Subcellular",
        "GO Cellular component",
        "OpenCell Localization",
    ],
    "Drugs and Diseases": [
        "Pharos IDG Category",
        "Pharos Drug Target Ontology",
        "Protein Atlas Disease involvement",
        "Human Phenotype Ontology",
    ],
}

# Annotation dict
annotation_dict = {
    # Sequence
    "InterPro Superfamily": "interpro_homologous_superfamily",
    "InterPro Family": "interpro_family",
    "InterPro Domain": "interpro_domain",
    "InterPro Active site": "interpro_active_site",
    "InterPro Binding site": "interpro_binding_site",
    "InterPro Conserved site": "interpro_conserved_site",
    "InterPro Repeat": "interpro_repeat",
    "InterPro PTM": "interpro_ptm",
    # Functions
    "Panther Protein class": "panther_protein_class",
    "Protein Atlas Protein class": "protein_atlas_protein_class",
    "Protein Atlas Molecular function": "protein_atlas_molecular_function",
    "GO Molecular function": "msigdb_gomf",
    # Pathways
    "Reactome Pathways": "msigdb_reactome",
    "KEGG Pathways": "msigdb_kegg",
    "WikiPathways": "msigdb_wp",
    "Protein Atlas Biological process": "protein_atlas_biological_process",
    "GO Biological process": "msigdb_gobp",
    # Localization
    "Protein Atlas Subcellular": "protein_atlas_subcellular_location_all", # _main?
    "GO Cellular component": "msigdb_gocc",
    "OpenCell Localization": "opencell_localization",
    # Drugs and Diseases
    "Pharos IDG Category": "pharos_protein_category",
    "Pharos Drug Target Ontology": "pharos_dto",
    "Protein Atlas Disease involvement": "protein_atlas_disease_involvement",
    "Human Phenotype Ontology": "msigdb_hp",
}

# Universe dict
universe_dict = {
    "Human Proteome": "human_proteome",
    "HEK293T Core": "hek293t_core",
    "Bind Degs Detected": "cemm_detected",
    "Bind Degs Enriched": "cemm_enriched",
    "Pulldown": "pulldown",
}