lhoestq HF staff commited on
Commit
aa98a64
·
1 Parent(s): c796372

use 10 samples

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -4,10 +4,12 @@ import gradio as gr
4
  con = duckdb.connect(":memory:")
5
 
6
  def greet(SQL_Query):
 
7
  return con.sql(SQL_Query).df()
8
 
9
  examples = [
10
- "SELECT dump, avg(token_count) FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' GROUP BY dump;",
11
  ]
12
- demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples)
 
13
  demo.launch()
 
4
  con = duckdb.connect(":memory:")
5
 
6
  def greet(SQL_Query):
7
+ con.sql("CREATE TABLE IF NOT EXISTS fineweb_10k_samples AS SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000;")
8
  return con.sql(SQL_Query).df()
9
 
10
  examples = [
11
+ "SELECT dump, avg(token_count) FROM fineweb_10k_samples GROUP BY dump;",
12
  ]
13
+ description = "Run SQL queries on the FineWeb dataset"
14
+ demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description)
15
  demo.launch()