Spaces:
Sleeping
Sleeping
| import duckdb | |
| import gradio as gr | |
| con = duckdb.connect(":memory:") | |
| def greet(SQL_Query): | |
| if "limit" not in SQL_Query.lower(): | |
| raise gr.Error("You should use the LIMIT clause or it may take too much time to run your query. For example\n\n\tLIMIT 10000") | |
| return con.sql(SQL_Query).df() | |
| examples = [ | |
| "SELECT dump, avg(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;", | |
| "SELECT dump, max(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;", | |
| "SELECT dump, min(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;", | |
| ] | |
| css = "#component-4{display: block;}" | |
| description = "Run SQL queries on the FineWeb dataset" | |
| demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description, css=css) | |
| demo.launch() | |