File size: 832 Bytes
317865f
20e02d9
 
 
 
 
317865f
20e02d9
 
 
317865f
20e02d9
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
import polars as pl 


data = pl.read_parquet("hf://datasets/polinaeterna/text_unnested/data/*.parquet")
min_min = data["min"].min()
min_max = data["min"].max()


def filter(min_value: min_min, max_value: min_max):
    df = data.filter((pl.col("min") >= min_value) & (pl.col("min") <= max_value)).to_pandas()
    if df.shape[0] > 100:
        return df.head(100)
    return df
    

with gr.Blocks() as demo:
    gr.Markdown("# 💫 Filter text datasets by string lengths distribution 💫")
    min_value = gr.Slider(min_min, min_max, 0, step=1, label="Min min value")
    max_value = gr.Slider(min_min, min_max, 0, step=1, label="Max min value")
    btn = gr.Button("Get datasets ")
    datasets = gr.DataFrame()
    btn.click(filter, inputs=[min_value, max_value], outputs=[datasets])

demo.launch(debug=True)