File size: 640 Bytes
fc95975
4f5af5a
 
fc95975
f622ed0
c7feb0b
4f5af5a
 
 
 
 
f622ed0
4f5af5a
 
 
 
 
 
 
 
 
f622ed0
4f5af5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import streamlit as st
import datasets
from functools import partial


data = datasets.load_dataset("json", data_files="small_test_data.jsonl")["train"].select(range(100))

bad_cutoff = st.slider('Bad words cutoff', 0, 1)
stp_cutoff = st.slider('Stop words cutoff', 0, 1)
ppl_cutoff = st.slider('ppl cutoff', 0, 1)


def filter_ppl(examples, invert=False):
    return [ppl < ppl_cutoff for ppl in examples["ppl"]]

def filter_bad(examples, invert=False):
    return [bad < bad_cutoff for bad in examples["bad_words"]]

def filter_stp(examples, invert=False):
    return [stp > stp_cutoff for stp in examples["stop_words"]]


st.table(data)