File size: 1,007 Bytes
c796372
dac1e43
 
c796372
dac1e43
c796372
38e9fcc
 
c796372
 
 
de23bb0
 
 
c796372
de23bb0
aa98a64
de23bb0
dac1e43
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import duckdb
import gradio as gr

con = duckdb.connect(":memory:")

def greet(SQL_Query):
    if "limit" not in SQL_Query.lower():
        raise gr.Error("You should use the LIMIT clause or it may take too much time to run your query. For example\n\n\tLIMIT 10000")
    return con.sql(SQL_Query).df()

examples = [
    "SELECT dump, avg(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
    "SELECT dump, max(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
    "SELECT dump, min(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
]
css = "#component-4{display: block;}"
description = "Run SQL queries on the FineWeb dataset"
demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description, css=css)
demo.launch()