ligdis commited on
Commit
0f91c49
·
verified ·
1 Parent(s): 3482ee0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +578 -0
app.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # regular imports
2
+ import os
3
+ import sys
4
+ import csv
5
+ import collections
6
+ import pandas as pd
7
+ import streamlit as st
8
+ import json
9
+ import gc
10
+ import requests
11
+ from PIL import Image
12
+ from io import BytesIO
13
+ from io import StringIO
14
+ from datasets import load_dataset
15
+
16
+ proteins_set = None
17
+
18
+ ROOT = os.path.abspath(os.path.dirname(__file__))
19
+ # TMP = os.path.join(ROOT, "tmp")
20
+ # if not os.path.exists(TMP):
21
+ # os.mkdir(TMP)
22
+
23
+ MIN_SET_SIZE = 1
24
+ PROFILE_TYPE = "Fragment"
25
+ OVERVIEW_PVALUE_CUTOFF = 0.05
26
+
27
+ # relative imports
28
+ # sys.path.append(os.path.join(ROOT, "../src/"))
29
+ # from util import listdir_util
30
+
31
+ def listdir_util(path):
32
+ for d in os.listdir(path):
33
+ if d.startswith("_"):
34
+ continue
35
+ else:
36
+ yield d
37
+
38
+ # import metadata
39
+ from proteome_meta import task_suf
40
+ from proteome_meta import annotation_type_dict
41
+ from proteome_meta import annotation_dict
42
+ from proteome_meta import universe_dict
43
+
44
+ # set page layout
45
+ st.set_page_config(layout="wide", page_title="Ligand Discovery Protein Set Enrichment Analysis")
46
+
47
+ # path to results and original data
48
+ PATH = os.path.abspath(os.path.join(ROOT, "../results/proteins/"))
49
+ DATA = os.path.abspath(os.path.join(ROOT, "../data"))
50
+ DATA2 = 'ligdis/data'
51
+ mySeparator = "/"
52
+ CACHE = os.path.abspath(os.path.join(ROOT, "../cache"))
53
+
54
+ # generic inputs
55
+
56
+ # protein id to gene name
57
+
58
+ dataset = load_dataset('ligdis/data', data_files={"general/pid2name_primary.tsv"}, delimiter='\t')
59
+ df = dataset['train'].to_pandas()
60
+ pid2name = dict(zip(df.iloc[:, 0], df.iloc[:, 1]))
61
+ name2pid = dict(zip(df.iloc[:, 1], df.iloc[:, 0]))
62
+ del dataset, df # Delete the variable
63
+ gc.collect()
64
+
65
+ def pid2gene(x):
66
+ if x in pid2name:
67
+ return pid2name[x]
68
+ else:
69
+ return x
70
+
71
+
72
+ def gene2pid(x):
73
+ if x in name2pid:
74
+ return name2pid[x]
75
+ else:
76
+ return x
77
+
78
+
79
+ def pretty_term(x):
80
+ x = x.title()
81
+ if x.endswith("]"):
82
+ x = x.split(" [")[0]
83
+ return x
84
+
85
+ def hf_tsv_2_pandas_df(hf_repo, data_file, myHeader):
86
+
87
+ url = '/'.join(("https://huggingface.co/datasets", hf_repo, "resolve/main", data_file))
88
+ response = requests.get(url)
89
+
90
+ if response.status_code == 200:
91
+ tsv_data = StringIO(response.text) # Use StringIO to treat the string content as a file-like object
92
+ df = pd.read_csv(tsv_data, sep='\t', header = myHeader) # Load the TSV file into a pandas DataFrame
93
+ else:
94
+ df = pd.DataFrame()
95
+ st.write("Error loading dataset from hf_repo: ", hf_repo, " and data_file: ", data_file)
96
+ return(df)
97
+
98
+ def load_hf_json(json_url):
99
+ response = requests.get(json_url)
100
+ if response.status_code == 200:
101
+ out = response.json()
102
+ else:
103
+ print("Failed to retrieve ", json_url, " file. HTTP Status Code: ", response.status_code)
104
+ return(out)
105
+
106
+ def load_hf_image(image_url):
107
+ response = requests.get(image_url)
108
+ if response.status_code == 200:
109
+ img = Image.open(BytesIO(response.content))
110
+ else:
111
+ print("Failed to retrieve image. HTTP Status Code:", response.status_code)
112
+ return(img)
113
+
114
+
115
+ # side bar
116
+
117
+ st.sidebar.title("Ligand Discovery Proteome Set Enrichment Analysis")
118
+
119
+ # signatures (aka profiles)
120
+ st.sidebar.header("Select a fragment")
121
+
122
+ profile_type = PROFILE_TYPE
123
+ profile_type_subfolder = profile_type.lower()
124
+
125
+ # @st.cache_data
126
+ # def get_sorted_fids():
127
+ # fids = []
128
+ # for fid in listdir_util(os.path.join(DATA, "signatures", "proteins", "fragment")):
129
+ # fids += [fid]
130
+ # fids = sorted(fids)
131
+ # return fids
132
+
133
+ with open("fid.txt", "r") as file:
134
+ lines = file.readlines()
135
+ # Remove the newline characters (\n) from each line
136
+ fids = [line.strip() for line in lines]
137
+
138
+ # fids = get_sorted_fids()
139
+ profile = st.sidebar.selectbox("Fragment identifier", options=fids)
140
+ profile_subfolder = profile
141
+ all_cases = fids
142
+ draw_fragment = True
143
+
144
+ st.sidebar.header("Choose a type of analysis")
145
+
146
+ type_of_analysis = st.sidebar.radio(
147
+ "Type of analysis", options=["Overview", "Detailed"]
148
+ )
149
+
150
+ # OVERVIEW TYPE OF ANALYSYS
151
+
152
+ if type_of_analysis == "Overview":
153
+
154
+ st.header("Enrichment overview for {0} {1}".format(profile_type.lower(), profile))
155
+ view = st.sidebar.radio("Select View", options=["Table", "Plot"])
156
+
157
+ df = hf_tsv_2_pandas_df(hf_repo="ligdis/cache_overview", data_file="{0}.tsv".format(profile), myHeader=0)
158
+
159
+ # df = pd.read_csv(os.path.join(CACHE, "overview", "{0}.tsv".format(profile)), sep="\t")
160
+
161
+ if view == "Table":
162
+
163
+ columns = st.columns(4)
164
+
165
+ prot2idx = collections.defaultdict(list)
166
+ for i,r in enumerate(list(df["edge"])):
167
+ for x in r.split(","):
168
+ gn = pid2gene(x)
169
+ prot2idx[gn] += [i]
170
+ all_proteins_ = sorted(prot2idx.keys())
171
+ ann2idx = collections.defaultdict(list)
172
+ for i,r in enumerate(df["term"]):
173
+ ann2idx[r] += [i]
174
+ all_annotations_ = sorted(ann2idx.keys())
175
+
176
+ type2idx = collections.defaultdict(list)
177
+ for i,r in enumerate(list(df["type"])):
178
+ type2idx[r] += [i]
179
+ all_types_ = sorted(type2idx.keys())
180
+
181
+ subtype2idx = collections.defaultdict(list)
182
+ for i,r in enumerate(list(df["subtype"])):
183
+ subtype2idx[r] += [i]
184
+ all_subtypes_ = sorted(subtype2idx.keys())
185
+
186
+ selected_proteins = columns[0].multiselect("Filter by proteins in leading edge ({0} unique proteins)".format(len(all_proteins_)), options=all_proteins_)
187
+ selected_annotations = columns[1].multiselect("Select annotations", options=all_annotations_)
188
+ selected_subtypes = columns[2].multiselect("Filter by annotation subtype", options=all_subtypes_)
189
+ selected_types = columns[3].multiselect("Filter by annotation type", options=all_types_)
190
+
191
+ keep_idxs = []
192
+ if selected_proteins is not None:
193
+ for x in selected_proteins:
194
+ for idx in prot2idx[x]:
195
+ keep_idxs += [idx]
196
+
197
+ if selected_annotations is not None:
198
+ for x in selected_annotations:
199
+ for idx in ann2idx[x]:
200
+ keep_idxs += [idx]
201
+
202
+ if selected_subtypes is not None:
203
+ for x in selected_subtypes:
204
+ for idx in subtype2idx[x]:
205
+ keep_idxs += [idx]
206
+
207
+ if selected_types is not None:
208
+ for x in selected_types:
209
+ for idx in type2idx[x]:
210
+ keep_idxs += [idx]
211
+
212
+ if keep_idxs:
213
+ keep_idxs = sorted(set(keep_idxs))
214
+ df = df.iloc[keep_idxs]
215
+
216
+ df["edge_genes"] = [" ".join([pid2gene(x) for x in r.split(",")]) for r in list(df["edge"])]
217
+
218
+ df_view = df[["term", "overlap", "setsize", "score", "pval", "edge_genes", "subtype", "type"]]
219
+ df_view = df_view.rename(columns = {
220
+ "term": "Term",
221
+ "overlap": "Edge size",
222
+ "setsize": "Set size",
223
+ "score": "Score",
224
+ "pval": "P-value",
225
+ "edge_genes": "Leading edge",
226
+ "subtype": "Category subtype",
227
+ "type": "Category type"
228
+ })
229
+ df_view["rank"] = [i+1 for i in range(df_view.shape[0])]
230
+ df_view = df_view.set_index("rank")
231
+
232
+ st.dataframe(df_view.reset_index(drop=True), height=2000)
233
+
234
+ else:
235
+ # st.image(os.path.join(CACHE, "overview", "{0}.png".format(profile)))
236
+ image_url = ''.join(("https://huggingface.co/datasets/ligdis/cache_overview/resolve/main/", "{0}.png".format(profile), "?download=true")) # Replace with actual URL
237
+ st.image(image_url)
238
+
239
+ ## DETAILED TYPE OF ANALYSIS
240
+
241
+ else:
242
+
243
+ def annotations_selector():
244
+ st.sidebar.header("Select protein annotation category")
245
+
246
+ annotation_types = [
247
+ "Sequence",
248
+ "Functions",
249
+ "Processes and pathways",
250
+ "Localization",
251
+ "Drugs and Diseases",
252
+ ]
253
+ annotation_type = st.sidebar.radio("Type of annotation", annotation_types)
254
+
255
+ annotations = annotation_type_dict[annotation_type]
256
+
257
+ annotation = st.sidebar.selectbox("Annotation source", options=annotations)
258
+ annotation_subfolder = annotation_dict[annotation]
259
+
260
+ return annotation, annotation_subfolder, annotation_type, annotations
261
+
262
+ def universe_selector():
263
+ preselected="HEK293T Core"
264
+ universe = preselected
265
+ universe_subfolder = universe_dict[universe]
266
+ return universe, universe_subfolder
267
+
268
+ annotation, annotation_subfolder, annotation_type, annotations = (
269
+ annotations_selector()
270
+ )
271
+
272
+ universe, universe_subfolder = universe_selector()
273
+
274
+ st.header("Fragment: {0} & Category: {2} ({1})".format(profile_subfolder, annotation_type, annotation))
275
+
276
+ # cache_folder = os.path.join(CACHE, "detailed", profile_subfolder, annotation_subfolder)
277
+ cache_folder = '/'.join(("https://huggingface.co/datasets/ligdis", '_'.join(("cache_detailed", profile_subfolder)), "resolve/main", annotation_subfolder ))
278
+
279
+ # read metrics
280
+
281
+ metrics_json_url = '/'.join((cache_folder, "metrics.json"))
282
+ metrics = load_hf_json(metrics_json_url)
283
+
284
+ # with open(os.path.join(cache_folder, "metrics.json"), "r") as f:
285
+ # metrics = json.load(f)
286
+
287
+ metric_cols = st.columns(3)
288
+ metric_cols[0].metric(
289
+ "{0} profile: {1}".format(profile_type, profile),
290
+ value="{0} proteins".format(metrics["signature_size"]),
291
+ )
292
+ metric_cols[1].metric(
293
+ "{0}: {1}".format(annotation_type, annotation),
294
+ value="{0} categories".format(metrics["annotations_size"]),
295
+ )
296
+ metric_cols[2].metric(metrics["title"], value=round(metrics["value"], 2))
297
+
298
+ columns = st.columns(6)
299
+ view = columns[0].radio("View", options=["Tables", "Basic plots", "Advanced plots"])
300
+
301
+ if view == "Tables":
302
+
303
+ p_value_cutoff = columns[2].number_input("P-value cutoff", value=0.05, min_value=0., max_value=1., format="%.3f")
304
+ min_edge_size = columns[3].number_input("Minimum leading edge size", value=5, min_value=0, max_value=10000)
305
+ max_edge_size = columns[4].number_input("Maximum leading edge size", value=5000, min_value=1, max_value=10000)
306
+ protein_label = "Gene Name"
307
+ if protein_label == "Gene Name":
308
+ convert_to_gene = True
309
+ else:
310
+ convert_to_gene = False
311
+
312
+ # available_selections = json.load(open(os.path.join(cache_folder, "selections.json"), "r"))
313
+ selections_json_url = '/'.join((cache_folder, "selections.json"))
314
+ available_selections = load_hf_json(selections_json_url)
315
+
316
+ all_annotations = available_selections["all_annotations"]
317
+ available_proteins = available_selections["available_proteins"]
318
+
319
+ select_columns = st.columns(3)
320
+ selected_annotations = select_columns[2].multiselect(
321
+ "Select annotation categories", options=available_proteins
322
+ )
323
+
324
+ selected_proteins = select_columns[0].multiselect(
325
+ "Filter by proteins found in at least one annotation term ({0})".format(
326
+ len(available_proteins)
327
+ ),
328
+ options=available_proteins,
329
+ )
330
+
331
+ task_filename = ''.join((profile, "_val_log2fc.tsv"))
332
+
333
+ ligdis_annotations_repo = '/'.join(('ligdis', annotation_subfolder))
334
+ annotations_json = '/'.join((profile_type_subfolder, profile_subfolder, task_filename.split(".tsv")[0], 'annotations.json'))
335
+ annotations_json_url = ''.join(("https://huggingface.co/datasets/", ligdis_annotations_repo, "/resolve/main/", annotations_json))
336
+
337
+ annotations_ = load_hf_json(annotations_json_url)
338
+
339
+ if selected_proteins:
340
+
341
+ if convert_to_gene:
342
+ selected_proteins = [gene2pid(x) for x in selected_proteins]
343
+ selected_proteins = set(selected_proteins)
344
+ if not selected_annotations:
345
+ for k, v in annotations_.items():
346
+ if len(selected_proteins.intersection(v)) > 0:
347
+ selected_annotations += [k]
348
+ if not selected_annotations:
349
+ st.warning(
350
+ "No available annotations for any of your proteins of interest..."
351
+ )
352
+
353
+ # result = pd.read_csv(os.path.join(cache_folder, "result.tsv"), sep="\t")
354
+
355
+ ligdis_cache_detailed_fragment_repo = '_'.join(("ligdis/cache_detailed", profile_subfolder))
356
+ result_file = '/'.join((annotation_subfolder, "result.tsv"))
357
+
358
+ result = hf_tsv_2_pandas_df(hf_repo = ligdis_cache_detailed_fragment_repo, data_file = result_file, myHeader=0)
359
+
360
+ result = result[result["leading_edge_size"] >= min_edge_size]
361
+ result = result[result["leading_edge_size"] <= max_edge_size]
362
+ result = result.reset_index(drop=True)
363
+
364
+ leading_proteins = available_selections["leading_proteins"]
365
+
366
+ selected_leading_proteins = select_columns[1].multiselect(
367
+ "Filter by proteins found in at least one leading edge",
368
+ options = leading_proteins)
369
+
370
+ if selected_leading_proteins:
371
+
372
+ prot2idx = collections.defaultdict(list)
373
+ for i, r in enumerate(list(result["leading_edge"])):
374
+ if str(r) == "nan":
375
+ continue
376
+ for x in r.split(","):
377
+ prot2idx[pid2gene(x)] += [i]
378
+
379
+ idxs = []
380
+ for v in selected_leading_proteins:
381
+ for x in prot2idx[v]:
382
+ idxs += [x]
383
+ idxs = sorted(set(idxs))
384
+ result = result.iloc[idxs]
385
+
386
+ # df_merge = pd.read_csv(os.path.join(cache_folder, "df_merge.tsv"), sep="\t")
387
+ df_merge_file = '/'.join((annotation_subfolder, "df_merge.tsv"))
388
+ df_merge = hf_tsv_2_pandas_df(hf_repo=ligdis_cache_detailed_fragment_repo, data_file=df_merge_file, myHeader=0)
389
+
390
+ type_of_task = metrics["type_of_task"]
391
+ if type_of_task == "ranksum":
392
+
393
+ sort_by = "NES"
394
+ if sort_by == "NES":
395
+ sort_by_nes = True
396
+ else:
397
+ sort_by_nes = False
398
+
399
+ direction = "Up"
400
+ if direction == "Up":
401
+ is_up = True
402
+ else:
403
+ is_up = False
404
+
405
+ df = result.copy()
406
+ df = df.rename(columns = {"Term": "term"})
407
+
408
+ df_merge = df_merge[["term", "score_mean"]]
409
+
410
+ df = df.merge(df_merge, how="left", on="term")
411
+
412
+ df = df[df["leading_edge"].notnull()]
413
+
414
+ df["edge_genes"] = [" ".join([pid2gene(x) for x in r.split(",")]) for r in list(df["leading_edge"])]
415
+
416
+ df = df[["term","leading_edge_size", "geneset_size", "nes", "pval", "fdr", "score_mean", "edge_genes", "leading_edge"]]
417
+
418
+ if selected_annotations:
419
+ df = df[df["term"].isin(selected_annotations)]
420
+
421
+ if is_up:
422
+ df = df[df["nes"] >= 0]
423
+ else:
424
+ df = df[df["nes"] < 0]
425
+ if sort_by_nes:
426
+ if is_up:
427
+ df = df.sort_values(by="nes", ascending=False)
428
+ else:
429
+ df = df.sort_values(by="nes", ascending=True)
430
+ else:
431
+ df = df.sort_values(by="pval")
432
+
433
+ df = df.reset_index(drop=True)
434
+
435
+ df = df.rename(columns = {
436
+ "term": "Term",
437
+ "leading_edge_size": "Edge size",
438
+ "geneset_size": "Set size",
439
+ "nes": "Score",
440
+ "pval": "P-value",
441
+ "fdr": "FDR",
442
+ "score_mean": "Mean score",
443
+ "edge_genes": "Leading edge",
444
+ })
445
+
446
+ st.dataframe(df[[c for c in list(df.columns)[:-1] if c != "Mean score"]].reset_index(drop=True))
447
+
448
+ term = st.selectbox("Explore term...", df["Term"])
449
+
450
+ if term is not None:
451
+
452
+ # signature_ori = pd.read_csv(os.path.join(results_path, "signature.tsv"), delimiter="\t", header=None)
453
+ ligdis_ontology_repo = '/'.join(("ligdis", annotation_subfolder))
454
+ ontology_signature_file = '/'.join((profile_type_subfolder, profile_subfolder, task_filename.split(".tsv")[0], "signature.tsv"))
455
+ signature_ = hf_tsv_2_pandas_df(hf_repo=ligdis_ontology_repo, data_file=ontology_signature_file, myHeader=None )
456
+
457
+ # signature_file = os.path.abspath(os.path.join(DATA,"signatures","proteins",profile_type_subfolder,profile_subfolder,task_filename))
458
+ ligdis_data_repo = '/'.join(("ligdis", "data"))
459
+ fragment_signature_file = '/'.join(("signatures/proteins/fragment", profile_subfolder, task_filename))
460
+
461
+ # Explore term
462
+
463
+ t_values = {}
464
+ for r in signature_.values:
465
+ t_values[r[0]] = r[1]
466
+ o_values = {}
467
+ # signature_original = pd.read_csv(signature_file, delimiter="\t", header=None)
468
+ signature_original = hf_tsv_2_pandas_df(hf_repo=ligdis_data_repo, data_file=fragment_signature_file, myHeader=None)
469
+
470
+ for r in signature_original.values:
471
+ o_values[r[0]] = r[1]
472
+
473
+ cols = st.columns([0.15, 1])
474
+
475
+ col = cols[0]
476
+
477
+ annotations_size = len(annotations_[term])
478
+ signature_size = len(signature_)
479
+
480
+ df_filt = df[df["Term"] == term]
481
+ leading_edge = list(df_filt["leading_edge"])[0]
482
+ if str(leading_edge) == "nan":
483
+ leading_edge = []
484
+ else:
485
+ leading_edge = leading_edge.split(",")
486
+ display_proteins = col.radio(
487
+ "Display proteins",
488
+ [
489
+ "Leading edge ({0})".format(len(leading_edge)),
490
+ "In category ({0})".format(annotations_size),
491
+ "Full profile ({0})".format(signature_size),
492
+ ],
493
+ )
494
+ if "Leading" in display_proteins:
495
+ proteins = leading_edge
496
+ elif "category" in display_proteins:
497
+ proteins = annotations_[term]
498
+ else:
499
+ proteins = signature_[0]
500
+ o_values = [o_values[pid] for pid in proteins]
501
+ t_values = [t_values[pid] for pid in proteins]
502
+
503
+ proteins_set = set(proteins)
504
+ if convert_to_gene:
505
+ genes = [pid2gene(x) for x in proteins]
506
+ label = "Gene Name"
507
+ else:
508
+ label = "UniProtAC"
509
+ dl = pd.DataFrame(
510
+ {"Gene Name": genes, "UniProt AC": proteins, "Log2FC": o_values, "Z-score": t_values}
511
+ )
512
+
513
+ sort_by = col.radio(
514
+ "Sort proteins", ["By Z-score", "Alphabetically"]
515
+ )
516
+ if sort_by != "Alphabetically":
517
+ if is_up:
518
+ dl = dl.sort_values("Z-score", ascending=False)
519
+ else:
520
+ dl = dl.sort_values("Z-score", ascending=True)
521
+ else:
522
+ dl = dl.sort_values(label)
523
+ dl = dl.reset_index(drop=True)
524
+
525
+ col = cols[1]
526
+ col.dataframe(dl.reset_index(drop=True))
527
+
528
+ if view == "Basic plots":
529
+ top_plots_number = columns[1].number_input("Maximum number of plots", value=12, min_value=1, max_value=50)
530
+ plot_columns = st.columns(4)
531
+
532
+ # with open(os.path.join(cache_folder, "basic", "idx2term.json"), "r") as f:
533
+ # idx2term = json.load(f)
534
+ idx2term_json_url = '/'.join((cache_folder, "basic", "idx2term.json"))
535
+ idx2term = load_hf_json(idx2term_json_url)
536
+
537
+ idxs = [i for i in range(len(idx2term))]
538
+
539
+ i = 0
540
+ j = 0
541
+
542
+ for idx in idxs:
543
+
544
+ if i == len(plot_columns):
545
+ i = 0
546
+ col = plot_columns[i]
547
+
548
+ if j == top_plots_number:
549
+ break
550
+
551
+ # col.image(os.path.join(cache_folder, "basic", "plot_{0}.png".format(idx)))
552
+
553
+ image_url = '/'.join((cache_folder, "basic", "plot_{0}.png".format(idx)))
554
+ col.image(image_url) # Show the image
555
+ i += 1
556
+ j += 1
557
+
558
+
559
+ if view == "Advanced plots":
560
+ top_plots_number = columns[1].number_input("Maximum number of plots", value=5, min_value=1, max_value=10)
561
+
562
+ # with open(os.path.join(cache_folder, "advanced", "idx2term.json"), "r") as f:
563
+ # idx2term = json.load(f)
564
+
565
+ idx2term_json_url = '/'.join((cache_folder, "advanced", "idx2term.json"))
566
+ idx2term = load_hf_json(idx2term_json_url)
567
+
568
+ idxs = [i for i in range(len(idx2term))]
569
+
570
+ j = 0
571
+ for idx in idxs:
572
+ if j == top_plots_number:
573
+ break
574
+
575
+ # st.image(os.path.join(cache_folder, "advanced", "plot_{0}.png".format(idx)))
576
+ image_url = '/'.join((cache_folder, "advanced", "plot_{0}.png".format(idx)))
577
+ st.image(image_url) # Show the image
578
+ j += 1