Hack90 commited on
Commit
cacb9ec
·
verified ·
1 Parent(s): fdebd4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -23,6 +23,11 @@ mpl.rcParams.update(mpl.rcParamsDefault)
23
  df = pd.read_parquet('virus_ds.parquet')
24
  virus = df['Organism_Name'].unique()
25
  virus = {v: v for v in virus}
 
 
 
 
 
26
  loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
27
  model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
28
  param_typesss = pd.read_csv("training_data_5.csv")['param_type'].unique().tolist()
@@ -76,17 +81,20 @@ with ui.navset_card_tab(id="tab"):
76
  ui.panel_title("How does sequence distribution vary across sequence length?")
77
  with ui.layout_columns():
78
  with ui.card():
79
- ui.input_selectize("virus_selector_1", "Select your viruses:", virus, multiple=True, selected=None)
80
  with ui.card():
81
  ui.input_slider(
82
- "basepair","Select basepair",0, 1000, 15
83
  )
84
 
85
  @render.plot()
86
  def plot_distro():
87
- df = pd.read_parquet("virus_ds.parquet")
88
- df = df[df["Organism_Name"].isin(input.virus_selector_1())]
89
- grouped = df.groupby("Organism_Name")["Sequence"].apply(list)
 
 
 
90
  return plot_distrobutions(grouped, grouped.index, input.basepair())
91
 
92
  with ui.nav_panel("Viral Microstructure"):
 
23
  df = pd.read_parquet('virus_ds.parquet')
24
  virus = df['Organism_Name'].unique()
25
  virus = {v: v for v in virus}
26
+ df_new = pd.read_parquet("virus.parquet")
27
+ df_new = df_new.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
28
+ filter_species = df_new.organism_name.value_counts().reset_index()[df_new.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
29
+ del df_new
30
+ virus_new = {v: v for v in filter_species}
31
  loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
32
  model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
33
  param_typesss = pd.read_csv("training_data_5.csv")['param_type'].unique().tolist()
 
81
  ui.panel_title("How does sequence distribution vary across sequence length?")
82
  with ui.layout_columns():
83
  with ui.card():
84
+ ui.input_selectize("virus_selector_1", "Select your viruses:", virus_new, multiple=True, selected=None)
85
  with ui.card():
86
  ui.input_slider(
87
+ "basepair","Select basepair",0, 10000, 15
88
  )
89
 
90
  @render.plot()
91
  def plot_distro():
92
+ df = pd.read_parquet("virus.parquet")
93
+ df = df.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
94
+ filter_species = df.organism_name.value_counts().reset_index()[df.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
95
+
96
+ df = df[df["organism_name"].isin(input.virus_selector_1())]
97
+ grouped = df.groupby("organism_name")["sequence"].apply(list)
98
  return plot_distrobutions(grouped, grouped.index, input.basepair())
99
 
100
  with ui.nav_panel("Viral Microstructure"):