fineweb-sql / app.py
lhoestq's picture
lhoestq HF staff
use 10 samples
aa98a64
raw
history blame
573 Bytes
import duckdb
import gradio as gr
con = duckdb.connect(":memory:")
def greet(SQL_Query):
con.sql("CREATE TABLE IF NOT EXISTS fineweb_10k_samples AS SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000;")
return con.sql(SQL_Query).df()
examples = [
"SELECT dump, avg(token_count) FROM fineweb_10k_samples GROUP BY dump;",
]
description = "Run SQL queries on the FineWeb dataset"
demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description)
demo.launch()