# Finetune DeepSeek Coder 1.3B for NBA + Tennis Kaggle Databases SQLite Generation

In [1]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig, EarlyStoppingCallback, PreTrainedTokenizer
from torch.utils.data import DataLoader
import sys
from peft import LoraConfig, get_peft_model, TaskType
from huggingface_hub import snapshot_download
import os
import re
import contextlib #helps make pip silent
import sys
import os
import numpy as np
with contextlib.redirect_stdout(sys.__stdout__), contextlib.redirect_stderr(sys.__stderr__):
 %pip install datasets

 from .autonotebook import tqdm as notebook_tqdm





# Define constants for using google colab or local runs

In [2]:
is_google_colab = False
use_bnb = True

# Establish read and write paths

In [3]:
current_read_path = "./"
current_write_path = "./"

def read_path(rel_path):
 return os.path.join(current_read_path, rel_path)

def write_path(rel_path):
 return os.path.join(current_write_path, rel_path)

if is_google_colab:
 from google.colab import drive
 drive.mount('/content/drive')
 current_write_path = "/content/drive/MyDrive/sql_gen"

 hugging_face_path = snapshot_download(
 repo_id="USC-Applied-NLP-Group/SQL-Generation",
 repo_type="model",
 allow_patterns=["train-data/*", "deepseek-coder-1.3b-instruct/*"], 
 )
 sys.path.append(hugging_face_path)
 current_read_path = hugging_face_path

## First define prompt

In [4]:
from utils.prompts.nba_prompt import input_text as nba_prompt
from utils.prompts.tennis_prompt import input_text as tennis_prompt

print(len(nba_prompt))
print(len(tennis_prompt))

9035
7990


## Load data and convert to Dataset object tokenized by the DeepSeek model

In [5]:
# Model output directories
MODEL_DIR = write_path("finetuned-model-16-full")
VAL_OUTPUT = write_path("val-16-full.hf")

# Load dataset
df_train = pd.read_csv(read_path("training-data/combined_full_dataset.tsv"), sep='\t')
df_test = pd.read_csv(read_path("training-data/test_set.tsv"), sep='\t')

# Fix any spacing issues
df_train.applymap(lambda x: re.sub(r'\s+', ' ', x) if isinstance(x, str) else x)
df_test.applymap(lambda x: re.sub(r'\s+', ' ', x) if isinstance(x, str) else x)

# Display dataset info
print(f"Total train dataset examples: {len(df_train)}")
print(df_train.head())
print(f"Total test dataset examples: {len(df_test)}")
print(df_test.head())
# Load tokenizer
model_name = read_path("deepseek-coder-1.3b-instruct")
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Enable 8-bit quantization for lower memory usage
bnb_config = None
if use_bnb:
 bnb_config = BitsAndBytesConfig(
 load_in_8bit=True, 
 bnb_8bit_compute_dtype=torch.float16
 )

# Load model with quantization
device_name = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device = torch.device(device_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name, 
 quantization_config=bnb_config,
 device_map=device
)

# Add a custom stop token (can be anything that won’t show up in your data)
special_token = "<|endofsql|>"

# Only add if it doesn’t already exist
print("adding special token")
print(len(tokenizer))
tokenizer.add_special_tokens({"additional_special_tokens": [special_token]})
tokenizer.eos_token = special_token
model.resize_token_embeddings(len(tokenizer))
print(len(tokenizer))

tokenizer.truncation_side = "left"

def format_deepseek_chat(example, tokenizer, special_token="<|endofsql|>"):
 # Manually build the prompt as one flat string
 if example['is_nba']:
 prompt = f"{nba_prompt}{example['natural_query']}\n"
 else:
 prompt = f"{tennis_prompt}{example['natural_query']}\n"

 completion = f"SQLite:\n{example['sql_query']}{special_token}"

 full_text = prompt + completion
 tokenized = tokenizer(
 full_text,
 truncation=True,
 padding="max_length",
 max_length=3156, # or whatever your model can handle
 )

 # Mask out prompt tokens in the labels
 prompt_len = len(tokenizer(prompt, truncation=True)["input_ids"])
 labels = tokenized["input_ids"][:]
 labels[:prompt_len] = [-100] * prompt_len
 tokenized["labels"] = labels

 return tokenized

# Build dataset dict
train_dataset_dict = {
 "natural_query": df_train["natural_query"].tolist(),
 "sql_query": df_train["sql_query"].tolist(),
 "is_nba": df_train["is_nba"].tolist(),
}


val_dataset_dict = {
 "natural_query": df_test["natural_query"].tolist(),
 "sql_query": df_test["sql_query"].tolist(),
 "is_nba": df_test["is_nba"].tolist(),
}

# Create HuggingFace Dataset
train_dataset = Dataset.from_dict(train_dataset_dict)
val_dataset = Dataset.from_dict(val_dataset_dict)

# Apply formatting
train_dataset = train_dataset.map(
 lambda x: format_deepseek_chat(x, tokenizer),
 remove_columns=["natural_query", "sql_query"]
)

val_dataset = val_dataset.map(
 lambda x: format_deepseek_chat(x, tokenizer),
 remove_columns=["natural_query", "sql_query"]
)

del df_train, df_test, train_dataset_dict, val_dataset_dict


for v in val_dataset:
 print(v)
 print(len(v['input_ids']))
 break


 df_train.applymap(lambda x: re.sub(r'\s+', ' ', x) if isinstance(x, str) else x)
 df_test.applymap(lambda x: re.sub(r'\s+', ' ', x) if isinstance(x, str) else x)


Total train dataset examples: 1014
 natural_query \
0 How many matches were played at Wimbledon in 2... 
1 How many US Open matches has Novak Djokovic pa... 
2 List all matches won by Cameron Norrie. 
3 What is the highest number of personal fouls c... 
4 What is the average points scored by the San A... 

 sql_query result is_nba 
0 SELECT COUNT(*) FROM matches WHERE tourney_n... 239 False 
1 SELECT COUNT(*) FROM matches WHERE tourney_nam... 84 False 
2 SELECT tourney_name FROM matches WHERE winner_... NaN False 
3 SELECT MAX(pf_away) as max_pf FROM game WHERE ... 41 True 
4 SELECT AVG(pts_away) as avg_points FROM game ... 102.35 True 
Total test dataset examples: 249
 natural_query \
0 How many spanish (ESP) players are there? 
1 How many distinct players appear in the rankin... 
2 How many times did the Los Angeles Clippers lo... 
3 How many times have the Boston Celtics won an ... 
4 Show the most successful player by win count 

 sql_query result \
0 SELECT COUNT(*) AS spanish_pla

Map: 100%|██████████| 1014/1014 [00:20<00:00, 49.27 examples/s]
Map: 100%|██████████| 249/249 [00:05<00:00, 47.50 examples/s]

{'is_nba': False, 'input_ids': [32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32




## Load model and define training arguments

In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
 r=16, # Rank of LoRA matrices (adjust for memory vs. accuracy)
 lora_alpha=32, # Scaling factor
 lora_dropout=0.0, # Dropout for regularization
 bias="none",
 task_type=TaskType.CAUSAL_LM,
 target_modules=[
 "q_proj",
 "k_proj",
 "v_proj",
 "o_proj",
 "gate_proj",
 "up_proj",
 "down_proj"
 ]
)

# Wrap model with LoRA adapters
model = get_peft_model(model, lora_config)
model = model.to(device)
model.print_trainable_parameters() # Show trainable parameters count

## Setup model trainer

In [None]:
training_args = TrainingArguments(
 output_dir=MODEL_DIR,
 evaluation_strategy="epoch", # Evaluate at the end of each epoch
 save_strategy="epoch", # Save model every epoch
 per_device_train_batch_size=1, 
 per_device_eval_batch_size=1,
 gradient_accumulation_steps=16,
 num_train_epochs=10, # Increase if needed
 learning_rate=5e-5, # Higher LR since we're only training LoRA layers
 weight_decay=0.001,
 logging_steps=50, # Print loss every 50 steps
 save_total_limit=2, # Keep last 2 checkpoints
 bf16=True if torch.cuda.is_available() else False,
 push_to_hub=False,
 load_best_model_at_end=True,
 metric_for_best_model="eval_loss",
 greater_is_better=False
)

# Trainer setup
trainer = Trainer(
 model=model,
 args=training_args,
 train_dataset=train_dataset,
 eval_dataset=val_dataset,
 tokenizer=tokenizer,
 callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

## Run fine-tuning and save model weights when complete

In [None]:
# Run training
trainer.train()

# Merge LoRA adapters with the base model before saving
model = model.merge_and_unload()
model.save_pretrained(MODEL_DIR)
tokenizer.save_pretrained(MODEL_DIR)

## Try inference using fine-tuned model

In [6]:
model = AutoModelForCausalLM.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, device_map=device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)

# Prepare query with the same prompt
input_text = "How many points do the Los Angeles Lakers average at home?"
message = [{'role': 'user', 'content': nba_prompt + input_text}]
inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors="pt").to(model.device)

# Generate SQL query
outputs = model.generate(
 inputs,
 max_new_tokens=256,
 eos_token_id=tokenizer.convert_tokens_to_ids("<|endofsql|>")
)
model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

print("Generated SQL:", model_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
 attn_output = torch.nn.functional.scaled_dot_product_attention(


Generated SQL: SQLite:
SELECT AVG(pts_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';anyes
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!
!


## Save validation set to disk

In [None]:
val_dataset.save_to_disk(VAL_OUTPUT)

## Test logic for obtaining original prompt and SQLite

In [7]:
import sqlite3 as sql

# Create connection to sqlite3 database
nba_connection = sql.connect(read_path('nba-data/nba.sqlite'))
nba_cursor = nba_connection.cursor()

tennis_connection = sql.connect(read_path('tennis-data/tennis.sqlite'))
tennis_cursor = tennis_connection.cursor()

for v in val_dataset:
 if v["is_nba"]:
 prompt_length = len(nba_prompt)
 else:
 prompt_length = len(tennis_prompt)
 full_example = tokenizer.decode(v["input_ids"], skip_special_tokens=True)
 user_prompt = full_example[:prompt_length]
 question, sql_query = full_example[prompt_length:].split("SQLite:\n")
 print(question)
 print(sql_query)
 if v["is_nba"]: 
 nba_cursor.execute(sql_query)
 rows = nba_cursor.fetchall()
 else:
 tennis_cursor.execute(sql_query)
 rows = tennis_cursor.fetchall()

 for row in rows:
 print(row)
 break

How many spanish (ESP) players are there?

SELECT COUNT(*) AS spanish_players FROM players WHERE ioc = 'ESP';
(3026,)


## Run evaluation over entire validation set

In [10]:
import math
import random

def compare_result(sample_query, model_output, is_nba):
 # Clean model output to only have the query output
 if model_output[0:8] == "SQLite:\n":
 query = model_output[8:]
 elif model_output[0:8] == "SQLite: ":
 query = model_output[8:]
 elif model_output[0:7] == "SQLite:":
 query = model_output[7:]
 elif model_output[0:5] == "SQL:\n":
 query = model_output[5:]
 elif model_output[0:5] == "SQL: ":
 query = model_output[5:]
 elif model_output[0:4] == "SQL:":
 query = model_output[4:]
 else:
 query = model_output

 # Clean any excess text after the query semicolon
 for i in range(len(query)):
 if query[i] == ";":
 query = query[:i+1]
 break

 # Get sample and model result
 if is_nba:
 nba_cursor.execute(sample_query)
 sample_result = [str(item) for tup in nba_cursor.fetchall() for item in tup]
 else:
 tennis_cursor.execute(sample_query)
 sample_result = [str(item) for tup in tennis_cursor.fetchall() for item in tup]
 
 try:
 if is_nba:
 nba_cursor.execute(query)
 else:
 tennis_cursor.execute(query)
 except:
 return False, False, False
 
 if is_nba:
 model_result = [str(item) for tup in nba_cursor.fetchall() for item in tup]
 else:
 model_result = [str(item) for tup in tennis_cursor.fetchall() for item in tup]
 
 print(sample_result)
 print(model_result)

 # Strip all whitespace before comparing queries since there may be differences in spacing, newlines, tabs, etc.
 query = query.replace(" ", "").replace("\n", "").replace("\t", "")
 sample_query = sample_query.replace(" ", "").replace("\n", "").replace("\t", "")
 query_match = (query == sample_query)

 # If the queries match, the results clearly also match
 if query_match:
 return True, True, True

 # Try to execute query, if it fails, then this is a failure of the model
 try:
 for r in sample_result:
 for res in model_result:
 try:
 if math.isclose(float(r), float(res), abs_tol=0.5):
 return True, False, True
 except:
 if r in res or res in r:
 return True, False, True
 return True, False, False
 except:
 return True, False, False
 
num_valid = 0
num_sql_matched = 0
num_result_matched = 0

counter = 0

for v in val_dataset:
 # Obtain sample natural language question and sql_query
 #v = val_dataset[random.randint(0, len(val_dataset) - 1)]
 if v["is_nba"]:
 prompt_length = len(nba_prompt)
 else:
 prompt_length = len(tennis_prompt)
 full_example = tokenizer.decode(v["input_ids"], skip_special_tokens=True)
 user_prompt = full_example[:prompt_length]
 question, sql_query = full_example[prompt_length:].split("SQLite:\n")
 #print(question)
 #print(sql_query)

 # Obtain model output
 #input_text = "How many points to the Los Angeles Lakers average at home?"
 if v["is_nba"]:
 message = [{'role': 'user', 'content': nba_prompt + question}]
 else:
 message = [{'role': 'user', 'content': tennis_prompt + question}]
 
 inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors="pt").to(model.device)

 # Generate SQL query
 outputs = model.generate(
 inputs,
 max_new_tokens=256,
 eos_token_id=tokenizer.convert_tokens_to_ids("<|endofsql|>")
 )
 model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

 print(sql_query)
 print(model_output.split(";")[0])
 #print()
 #print(model_output)
 result = compare_result(sql_query, model_output, v["is_nba"])
 print("Statement valid? " + str(result[0]))
 print("SQLite matched? " + str(result[1]))
 print("Result matched? " + str(result[2]))
 print()
 print()

 if result[0]:
 num_valid += 1
 if result[1]:
 num_sql_matched += 1
 if result[2]:
 num_result_matched += 1

 counter += 1
 if counter == 105:
 break
 elif counter % 10 == 0:
 print("Finished: " + str(counter))

 print("Percent valid: " + str(num_valid / counter))
 print("Percent SQLite matched: " + str(num_sql_matched / counter))
 print("Percent result matched: " + str(num_result_matched / counter))

print("Percent valid: " + str(num_valid / counter))
print("Percent SQLite matched: " + str(num_sql_matched / counter))
print("Percent result matched: " + str(num_result_matched / counter))

# break

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) AS spanish_players FROM players WHERE ioc = 'ESP';
SQLite:
SELECT COUNT(*) FROM players WHERE ioc = 'ESP'
['3026']
['3026']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 1.0
SELECT COUNT(DISTINCT player) AS distinct_players FROM rankings;
SQLite:
SELECT COUNT(DISTINCT player) FROM rankings


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['16174']
['16174']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 1.0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM game g WHERE g.team_abbreviation_home = 'LAC' AND g.wl_home = 'L' AND g.stl_home > g.stl_away AND g.blk_home > g.blk_away AND g.season_id = '22002';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_home = 'Los Angeles Clippers' AND season_id = '22002' AND (stl_home + blk_home) > (stl_away + blk_away) AND wl_home = 'L'
['4']
['13']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 0.6666666666666666


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM game WHERE team_abbreviation_away = 'BOS' AND wl_away = 'W' AND (pts_away - pts_home) >= 20;
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_away = 'Boston Celtics' AND wl_away = 'W' AND pts_away >= 20
['179']
['1425']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 0.5
SELECT winner_name, COUNT(*) as total_wins FROM matches WHERE winner_name IS NOT NULL GROUP BY winner_name ORDER BY total_wins DESC LIMIT 1;
SQLite:
SELECT winner_name, COUNT(*) AS win_count FROM matches GROUP BY winner_name ORDER BY win_count DESC LIMIT 1


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['Roger Federer', '1305']
['None', '26399']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 0.4


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT season_id, AVG(ft_pct_home) as avg_stat FROM game WHERE team_name_home = 'Chicago Bulls' GROUP BY season_id ORDER BY avg_stat DESC LIMIT 1;
SQLite:
SELECT season_id FROM game WHERE team_name_home = 'Chicago Bulls' GROUP BY season_id ORDER BY AVG(ft_pct_home) DESC LIMIT 1
['42016', '0.89']
['42016']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 0.5


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT team FROM (SELECT team_abbreviation_home AS team, pts_home AS pts FROM game UNION ALL SELECT team_abbreviation_away, pts_away FROM game) WHERE pts < 60 GROUP BY team ORDER BY COUNT(*) DESC LIMIT 1;
SQLite:
SELECT team_name_home FROM game WHERE pts_home < 60 ORDER BY pts_home DESC LIMIT 1
['BOS']
['Providence Steamrollers']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 0.42857142857142855
SELECT p.name FROM players p JOIN rankings r ON p.player_id = r.player WHERE r.rank = 1 AND r.ranking_date > 20100101 AND r.ranking_date < 20100108;
SQLite:
SELECT name FROM players WHERE player_id IN (SELECT player FROM rankings WHERE ranking_date = 20100101 AND rank = 1)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['Roger Federer']
[]
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 1.0
Percent SQLite matched: 0.0
Percent result matched: 0.375


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(lead_changes) as total_lead_changes FROM other_stats WHERE team_abbreviation_away = 'DEN';
SQLite:
SELECT COUNT(*) FROM other_stats WHERE team_name_away = 'Denver Nuggets' AND lead_changes > 0
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8888888888888888
Percent SQLite matched: 0.0
Percent result matched: 0.3333333333333333
SELECT COUNT(DISTINCT winner_ioc) FROM matches WHERE tourney_name = 'US Open';
SQLite:
SELECT COUNT(DISTINCT winner_ioc) FROM matches WHERE tourney_name = 'US Open'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['90']
['90']
Statement valid? True
SQLite matched? True
Result matched? True


Finished: 10
Percent valid: 0.9
Percent SQLite matched: 0.1
Percent result matched: 0.4


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(CASE WHEN team_name_home = 'Miami Heat' THEN stl_home ELSE stl_away END) AS total_steals FROM game WHERE (team_name_home = 'Miami Heat' AND team_name_away = 'Boston Celtics') OR (team_name_home = 'Boston Celtics' AND team_name_away = 'Miami Heat');
SQLite:
SELECT SUM(stl_away) FROM game WHERE team_name_away = 'Miami Heat'
['1253.0']
['11520.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.9090909090909091
Percent SQLite matched: 0.09090909090909091
Percent result matched: 0.36363636363636365
SELECT COUNT(*) FROM matches WHERE loser_name = 'Nick Kyrgios' AND tourney_name = 'Roland Garros';
SQLite:
SELECT COUNT(*) FROM matches WHERE loser_name = 'Nick Kyrgios' AND tourney_name = 'Roland Garros'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['5']
['5']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.9166666666666666
Percent SQLite matched: 0.16666666666666666
Percent result matched: 0.4166666666666667


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT team_name_home, AVG(ft_pct_home) AS avg_ft_percentage FROM game WHERE season_id = '22016' GROUP BY team_name_home ORDER BY avg_ft_percentage DESC LIMIT 1;
SQLite:
SELECT team_name_home, AVG(ft_pct_home) AS avg_ft_pct_home FROM game WHERE season_id = '22016' GROUP BY team_name_home ORDER BY avg_ft_pct_home DESC LIMIT 1
['Boston Celtics', '0.8209756097560975']
['Boston Celtics', '0.8209756097560975']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.9230769230769231
Percent SQLite matched: 0.15384615384615385
Percent result matched: 0.46153846153846156


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(os.pts_fb_home) AS avg_fast_break FROM other_stats os JOIN game g ON os.game_id = g.game_id WHERE g.team_abbreviation_home = 'PHI' AND g.season_id = '22018';
SQLite:
SELECT AVG(pts_fb_home) AS avg_fb_points FROM game WHERE team_name_home = 'Philadelphia 76ers' AND season_id = '22018'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.14285714285714285
Percent result matched: 0.42857142857142855


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(pts) AS total_points FROM ( SELECT pts_home AS pts FROM game WHERE team_abbreviation_home = 'CHI' AND season_id = '21988' UNION ALL SELECT pts_away AS pts FROM game WHERE team_abbreviation_away = 'CHI' AND season_id = '21988' );
SQLite:
SELECT SUM(pts_home + pts_away) AS total_points FROM game WHERE team_name_home = 'Chicago Bulls' AND season_id = '21988'
['8726.0']
['8591.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8666666666666667
Percent SQLite matched: 0.13333333333333333
Percent result matched: 0.4


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT season_id, AVG(fg_pct_home) as avg_stat FROM game WHERE team_name_home = 'Milwaukee Bucks' GROUP BY season_id ORDER BY avg_stat DESC LIMIT 1;
SQLite:
SELECT season_id FROM game WHERE team_name_home = 'Milwaukee Bucks' GROUP BY season_id ORDER BY AVG(fg_pct_home) DESC LIMIT 1
['42017', '0.5326666666666666']
['42017']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.875
Percent SQLite matched: 0.125
Percent result matched: 0.4375


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MIN(height) FROM players WHERE height IS NOT NULL;
SQLite:
SELECT MIN(height) FROM players
['145.0']
['145.0']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8823529411764706
Percent SQLite matched: 0.11764705882352941
Percent result matched: 0.47058823529411764
SELECT count(*) FROM matches WHERE loser_name = 'Rafael Nadal';
SQLite:
SELECT COUNT(*) FROM matches WHERE loser_name = 'Rafael Nadal'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['255']
['255']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8888888888888888
Percent SQLite matched: 0.1111111111111111
Percent result matched: 0.5
SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND best_of = '5';
SQLite:
SELECT COUNT(*) FROM matches WHERE best_of = '5' AND tourney_name = 'US Open'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['14144']
['14144']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8947368421052632
Percent SQLite matched: 0.10526315789473684
Percent result matched: 0.5263157894736842


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT name, dob FROM players WHERE dob > 20080000;
SQLite:
SELECT * FROM players WHERE dob > 20080101
['Vito Antonio Darderi', '20080113.0']
['212625', 'U', '20080113.0', 'ITA', 'None', 'Vito Antonio Darderi']
Statement valid? True
SQLite matched? False
Result matched? True


Finished: 20
Percent valid: 0.9
Percent SQLite matched: 0.1
Percent result matched: 0.55
SELECT COUNT(*) FROM matches WHERE winner_name = 'Rafael Nadal' AND tourney_name = 'Australian Open';
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_name = 'Rafael Nadal' AND tourney_name = 'Australian Open'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['77']
['77']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.9047619047619048
Percent SQLite matched: 0.14285714285714285
Percent result matched: 0.5714285714285714
SELECT COUNT(*) FROM matches WHERE loser_name = 'Taylor Fritz' AND tourney_name = 'Roland Garros';
SQLite:
SELECT COUNT(*) FROM matches WHERE loser_name = 'Taylor Fritz' AND tourney_name = 'Roland Garros'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['7']
['7']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.9090909090909091
Percent SQLite matched: 0.18181818181818182
Percent result matched: 0.5909090909090909


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT state FROM team WHERE nickname = 'Jazz';
SQLite:
SELECT state FROM team WHERE nickname = 'Jazz'
['Utah']
['Utah']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.9130434782608695
Percent SQLite matched: 0.21739130434782608
Percent result matched: 0.6086956521739131


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) as home_losses FROM game WHERE team_name_home = 'Sacramento Kings' AND wl_home = 'L' AND season_id = '21996';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_home = 'Sacramento Kings' AND wl_home = 'L' AND season_id = '21996'
['19']
['19']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.9166666666666666
Percent SQLite matched: 0.20833333333333334
Percent result matched: 0.625
SELECT winner_name FROM matches WHERE tourney_date BETWEEN 20210000 AND 20211231 ORDER BY minutes DESC LIMIT 1;
SQLite:
SELECT winner_name FROM matches WHERE tourney_name = 'Wimbledon' AND CAST(tourney_date AS INT) BETWEEN 20210000 AND 20211231


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['Jozef Kovalik']
['Kamil Majchrzak', 'Robin Haase', 'Tomas Barrios Vera', 'Enzo Couacaud', 'Mackenzie Mcdonald', 'Illya Marchenko', 'Marc Andrea Huesler', 'Maxime Cressy', 'Andrej Martin', 'Ernests Gulbis', 'Viktor Troicki', 'Brandon Nakashima', 'Francisco Cerundolo', 'Roberto Marcora', 'Alejandro Tabilo', 'Zhizhen Zhang', 'Yasutaka Uchiyama', 'Dudi Sela', 'Maxime Janvier', 'Oscar Otte', 'Denis Kudla', 'Kacper Zuk', 'Roman Safiullin', 'Federico Gaio', 'Arthur Rinderknech', 'Jack Pinnington Jones', 'Zdenek Kolar', 'Nikola Milojevic', 'Tallon Griekspoor', 'Shintaro Mochizuki', 'Matthew Ebden', 'Arthur Fery', 'Benjamin Bonzi', 'Daniel Altmaier', 'Mohamed Safwat', 'Frederico Ferreira Silva', 'Bernabe Zapata Miralles', 'Bernard Tomic', 'Anton Matusevich', 'Carlos Taberner', 'Thiago Seyboth Wild', 'Marco Trungelliti', 'Altug Celikbilek', 'Botic Van De Zandschulp', 'Damir Dzumhur', 'Alex Molcan', 'Marius Copil', 'Antoine Hoang', 'Ramkumar Ramanathan', 'Tomas Martin Etcheverry', 'Felix Gill',

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(o.pts_fb_home) AS avg_fastbreak_points FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.team_name_home = 'LA Clippers' AND g.season_id = '22020';
SQLite:
SELECT AVG(pts_fb_home) AS avg_fastbreak_points FROM game WHERE team_name_home = 'Los Angeles Clippers' AND season_id = '22020'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8846153846153846
Percent SQLite matched: 0.19230769230769232
Percent result matched: 0.5769230769230769


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) as games FROM other_stats os JOIN game g ON os.game_id = g.game_id WHERE g.team_name_home = 'Cleveland Cavaliers' AND os.times_tied > 8 AND g.season_id = '21996';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_home = 'Cleveland Cavaliers' AND season_id = '21996' AND times_tied > 8
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8518518518518519
Percent SQLite matched: 0.18518518518518517
Percent result matched: 0.5555555555555556


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT g.team_name_home, AVG(o.pts_2nd_chance_home) AS avg_second_chance_pts FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.wl_home = 'W' AND g.season_id = '22016' GROUP BY g.team_name_home ORDER BY avg_second_chance_pts DESC LIMIT 1;
SQLite:
SELECT team_name_home, AVG(pts_2nd_chance_home) AS avg_points_2nd_chance 
FROM game 
JOIN other_stats ON game.game_id = other_stats.game_id 
WHERE team_name_home IN (
 SELECT full_name FROM team WHERE year_founded BETWEEN 1979 AND 2016 
) 
AND wl_home = 'W' 
AND season_id = '22016' 
GROUP BY team_name_home 
ORDER BY avg_points_2nd_chance DESC 
LIMIT 1
['Los Angeles Lakers', '15.615384615384615']
['Charlotte Hornets', '14.68421052631579']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.17857142857142858
Percent result matched: 0.5357142857142857


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT team FROM (SELECT team_abbreviation_home AS team FROM game WHERE pts_home > 110 AND pts_away > 110 UNION ALL SELECT team_abbreviation_away FROM game WHERE pts_home > 110 AND pts_away > 110) GROUP BY team ORDER BY COUNT(*) DESC LIMIT 1;
SQLite:
SELECT team_name_home FROM game WHERE pts_home > 110 AND pts_away > 110
['LAL']
['Baltimore Bullets', 'Sheboygan Redskins', 'Syracuse Nationals', 'Boston Celtics', 'Boston Celtics', 'New York Knicks', 'Rochester Royals', 'Philadelphia Warriors', 'Syracuse Nationals', 'Ft. Wayne Zollner Pistons', 'Boston Celtics', 'Rochester Royals', 'Syracuse Nationals', 'Minneapolis Lakers', 'Boston Celtics', 'Boston Celtics', 'Boston Celtics', 'Rochester Royals', 'Philadelphia Warriors', 'Boston Celtics', 'Philadelphia Warriors', 'Boston Celtics', 'Syracuse Nationals', 'Boston Celtics', 'Philadelphia Warriors', 'Boston Celtics', 'Philadelphia Warriors', 'St. Louis Hawks', 'Minneapolis Lakers', 'Syracuse Nationals', 'New York Knicks', 'Philadelphia Warrio

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['USA', '26319.0', 'ITA', '24835.0', 'FRA', '24732.0', 'RUS', '17876.0', 'ESP', '17474.0', 'ARG', '16757.0', 'AUS', '14319.0', 'GER', '14093.0', 'SRB', '13632.0', 'GBR', '8007.0']
['ESP', '36793743.0', 'USA', '33538077.0', 'FRA', '26570241.0', 'GER', '19830850.0', 'ARG', '19049895.0', 'ITA', '15012498.0', 'RUS', '13703189.0', 'AUS', '13322279.0', 'SUI', '11696039.0', 'SRB', '11587096.0']
Statement valid? True
SQLite matched? False
Result matched? True


Finished: 30
Percent valid: 0.8666666666666667
Percent SQLite matched: 0.16666666666666666
Percent result matched: 0.5333333333333333


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM players WHERE name LIKE '%Tennis%';
SQLite: SELECT COUNT(*) FROM players WHERE name LIKE '%Tennis%'
['0']
['0']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8709677419354839
Percent SQLite matched: 0.1935483870967742
Percent result matched: 0.5483870967741935


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT game_date FROM game WHERE team_abbreviation_home = 'NYK' AND wl_home = 'W' ORDER BY game_date DESC LIMIT 1;
SQLite:
SELECT MAX(game_date) FROM game WHERE team_name_home = 'New York Knicks'
['2023-05-10 00:00:00']
['2023-05-10 00:00:00']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.875
Percent SQLite matched: 0.1875
Percent result matched: 0.5625


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(pts_fb_home) FROM other_stats WHERE game_id IN ( SELECT game_id FROM game WHERE team_name_home = 'Memphis Grizzlies' AND season_id = '22005' );
SQLite:
SELECT SUM(pts_fb_home) AS total_fast_break_points FROM game WHERE team_name_home = 'Memphis Grizzlies' AND season_id = '22005'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8484848484848485
Percent SQLite matched: 0.18181818181818182
Percent result matched: 0.5454545454545454
SELECT COUNT(*) FROM matches WHERE score LIKE '%RET%';
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_age <= 18 AND loser_age <= 18


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['27685']
['2432']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8529411764705882
Percent SQLite matched: 0.17647058823529413
Percent result matched: 0.5294117647058824


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(assists) AS avg_assists FROM ( SELECT ast_home AS assists FROM game WHERE team_name_home = 'Golden State Warriors' AND wl_home = 'W' AND season_id = '22018' UNION ALL SELECT ast_away AS assists FROM game WHERE team_name_away = 'Golden State Warriors' AND wl_away = 'W' AND season_id = '22018' ) AS winning_games
SQLite:
SELECT AVG(ast_home) AS avg_assists FROM game WHERE team_name_home = 'Golden State Warriors' AND wl_home = 'W' AND season_id = '22018'
['31.0']
['31.333333333333332']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.17142857142857143
Percent result matched: 0.5428571428571428


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) as away_losses FROM game WHERE team_name_away = 'Golden State Warriors' AND wl_away = 'L' AND season_id = '21996';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_away = 'Golden State Warriors' AND wl_away = 'L' AND season_id = '21996'
['29']
['29']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8611111111111112
Percent SQLite matched: 0.16666666666666666
Percent result matched: 0.5555555555555556


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(os.pts_paint_home) as total_pts_paint FROM other_stats os JOIN game g ON os.game_id = g.game_id WHERE g.team_name_home = 'Chicago Bulls' AND g.wl_home = 'L' AND g.season_id = '21996';
SQLite:
SELECT SUM(pts_paint_home) FROM other_stats WHERE team_name_home = 'Chicago Bulls' AND season_id = '21996' AND wl_home = 'L'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8378378378378378
Percent SQLite matched: 0.16216216216216217
Percent result matched: 0.5405405405405406


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT year_founded FROM team WHERE full_name = 'Los Angeles Clippers';
SQLite:
SELECT year_founded FROM team WHERE full_name = 'Los Angeles Clippers'
['1970.0']
['1970.0']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8421052631578947
Percent SQLite matched: 0.18421052631578946
Percent result matched: 0.5526315789473685
SELECT avg(winner_age) FROM matches WHERE tourney_name = 'Australian Open';
SQLite:
SELECT AVG(winner_age) FROM matches WHERE tourney_name = 'Australian Open'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['25.690531480738247']
['25.690531480738247']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8461538461538461
Percent SQLite matched: 0.1794871794871795
Percent result matched: 0.5641025641025641


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT full_name FROM team WHERE state = 'Indiana';
SQLite:
SELECT full_name FROM team WHERE state = 'Indiana'
['Indiana Pacers']
['Indiana Pacers']
Statement valid? True
SQLite matched? True
Result matched? True


Finished: 40
Percent valid: 0.85
Percent SQLite matched: 0.2
Percent result matched: 0.575
SELECT COUNT(DISTINCT tourney_name) FROM matches WHERE winner_name = 'Pete Sampras' OR loser_name = 'Pete Sampras';
SQLite:
SELECT COUNT(DISTINCT tourney_name) FROM matches WHERE winner_name = 'Pete Sampras' OR loser_name = 'Pete Sampras'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['82']
['82']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8536585365853658
Percent SQLite matched: 0.21951219512195122
Percent result matched: 0.5853658536585366
SELECT T1.name, T1.ioc FROM players AS T1 JOIN matches AS T2 ON T1.player_id = T2.loser_id ORDER BY T2.minutes DESC LIMIT 1;
SQLite:
SELECT winner_name, winner_ioc FROM matches ORDER BY minutes DESC LIMIT 1


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['Tomas Lipovsek Puches', 'ARG']
['Federico Coria', 'ARG']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.21428571428571427
Percent result matched: 0.5952380952380952


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM game WHERE team_name_home = 'Orlando Magic' AND season_id = '22013';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_home = 'Orlando Magic' AND season_id = '22013'
['41']
['41']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8604651162790697
Percent SQLite matched: 0.23255813953488372
Percent result matched: 0.6046511627906976


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(o.pts_fb_home) AS avg_fastbreak_points FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.team_name_home = 'Houston Rockets' AND g.wl_home = 'W' AND (g.pts_home - g.pts_away) > 15;
SQLite:
SELECT AVG(pts_fb_home) AS avg_fastbreak_points FROM game WHERE team_name_home = 'Houston Rockets' AND wl_home = 'W' AND pts_home - pts_away > 15
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8409090909090909
Percent SQLite matched: 0.22727272727272727
Percent result matched: 0.5909090909090909


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(ft_pct_home + ft_pct_away) FROM game WHERE team_name_home = 'Los Angeles Lakers' OR team_name_away = 'Los Angeles Lakers';
SQLite:
SELECT MAX(ft_pct_home + ft_pct_away) AS highest_combined_ft_pct FROM game WHERE team_name_home = 'Los Angeles Lakers' OR team_name_away = 'Los Angeles Lakers'
['1.9569999999999999']
['1.9569999999999999']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8444444444444444
Percent SQLite matched: 0.2222222222222222
Percent result matched: 0.6


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(g.pts_home) FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.team_abbreviation_home = 'LAC' AND o.team_turnovers_home > o.team_turnovers_away AND o.total_turnovers_home < o.total_turnovers_away AND g.season_id = '22014';
SQLite:
SELECT COUNT(*) FROM game 
WHERE team_name_home = 'Los Angeles Clippers' 
AND team_turnovers_home > other_stats.team_turnovers_away 
AND total_turnovers_home < total_turnovers_away
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8260869565217391
Percent SQLite matched: 0.21739130434782608
Percent result matched: 0.5869565217391305


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT team_abbreviation_away FROM other_stats ORDER BY pts_off_to_away DESC LIMIT 1;
SQLite:
SELECT team_abbreviation_away, SUM(pts_off_to_away) AS total_pts_off_turnovers FROM other_stats GROUP BY team_abbreviation_away ORDER BY total_pts_off_turnovers DESC LIMIT 1
['ATL']
['ATL', '14423']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8297872340425532
Percent SQLite matched: 0.2127659574468085
Percent result matched: 0.5957446808510638


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(reb_home) FROM game WHERE team_abbreviation_home = 'SAS' AND season_id = '22015';
SQLite:
SELECT SUM(reb_home) AS total_rebounds FROM game WHERE team_name_home = 'San Antonio Spurs' AND season_id = '22015'
['1845.0']
['1845.0']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8333333333333334
Percent SQLite matched: 0.20833333333333334
Percent result matched: 0.6041666666666666
SELECT AVG(winner_age) FROM matches WHERE tourney_name = 'Wimbledon' AND winner_name = 'Roger Federer';
SQLite:
SELECT AVG(winner_age) AS avg_age FROM matches WHERE tourney_name = 'Wimbledon' AND winner_name = 'Roger Federer'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['29.316037735849058']
['29.316037735849058']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8367346938775511
Percent SQLite matched: 0.20408163265306123
Percent result matched: 0.6122448979591837
SELECT COUNT(*) FROM matches WHERE winner_age < 25;
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_age < 25


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['573136']
['573136']
Statement valid? True
SQLite matched? True
Result matched? True


Finished: 50
Percent valid: 0.84
Percent SQLite matched: 0.22
Percent result matched: 0.62


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(pts_away) FROM game WHERE team_abbreviation_away = 'PHX';
SQLite:
SELECT pts_away FROM game ORDER BY pts_away DESC LIMIT 1
['161.0']
['196.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8431372549019608
Percent SQLite matched: 0.21568627450980393
Percent result matched: 0.6078431372549019
SELECT AVG(loser_age) FROM matches WHERE tourney_name = 'Wimbledon' AND best_of = '5';
SQLite:
SELECT AVG(loser_age) FROM matches WHERE tourney_name = 'Wimbledon' AND best_of = '5'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['26.897281943732857']
['26.897281943732857']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8461538461538461
Percent SQLite matched: 0.23076923076923078
Percent result matched: 0.6153846153846154


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT pts_home + pts_away FROM game WHERE fgm_home = 33 LIMIT 1;
SQLite:
SELECT pts_home FROM game WHERE fgm_home = 33
['144.0']
['76.0', '81.0', '77.0', '78.0', '77.0', '81.0', '75.0', '83.0', '79.0', '83.0', '78.0', '91.0', '78.0', '75.0', '89.0', '89.0', '95.0', '81.0', '82.0', '105.0', '88.0', '83.0', '92.0', '79.0', '84.0', '89.0', '81.0', '86.0', '96.0', '89.0', '96.0', '89.0', '83.0', '88.0', '89.0', '99.0', '88.0', '94.0', '104.0', '93.0', '103.0', '106.0', '95.0', '120.0', '125.0', '85.0', '93.0', '92.0', '93.0', '89.0', '79.0', '100.0', '88.0', '80.0', '89.0', '87.0', '84.0', '95.0', '86.0', '88.0', '96.0', '89.0', '93.0', '89.0', '80.0', '85.0', '91.0', '98.0', '88.0', '83.0', '93.0', '93.0', '91.0', '89.0', '90.0', '85.0', '79.0', '95.0', '79.0', '97.0', '83.0', '110.0', '100.0', '82.0', '73.0', '81.0', '86.0', '97.0', '89.0', '86.0', '82.0', '83.0', '86.0', '78.0', '94.0', '100.0', '100.0', '84.0', '90.0', '98.0', '97.0', '86.0', '100.0', '92.0', '93.0', '85.0', '92.0', '

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['249.0']
['249.0']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8518518518518519
Percent SQLite matched: 0.2222222222222222
Percent result matched: 0.6111111111111112


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(pts_away) FROM game WHERE team_name_away = 'Miami Heat';
SQLite:
SELECT AVG(pts_away) FROM game WHERE team_name_away = 'Miami Heat'
['96.7824377457405']
['96.7824377457405']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8545454545454545
Percent SQLite matched: 0.23636363636363636
Percent result matched: 0.6181818181818182


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT game_date, ABS(pts_home - pts_away) AS margin FROM game ORDER BY margin DESC LIMIT 1;
SQLite:
SELECT MAX(CASE WHEN wl_home = 'W' THEN pts_home - pts_away ELSE pts_away - pts_home END) AS largest_victory_margin FROM game
['2021-12-02 00:00:00', '73.0']
['73.0']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.23214285714285715
Percent result matched: 0.625
SELECT COUNT(*) FROM matches WHERE winner_name = 'Carlos Alcaraz' AND tourney_name = 'Wimbledon';
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_name = 'Carlos Alcaraz' AND tourney_name = 'Wimbledon'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['11']
['11']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8596491228070176
Percent SQLite matched: 0.24561403508771928
Percent result matched: 0.631578947368421


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM players WHERE height > (SELECT AVG(height) FROM players);
SQLite:
SELECT COUNT(*) FROM players WHERE height > (SELECT AVG(height) FROM players)
['1366']
['1366']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8620689655172413
Percent SQLite matched: 0.25862068965517243
Percent result matched: 0.6379310344827587
SELECT COUNT(*) FROM matches WHERE winner_ioc = 'ITA' AND loser_ioc = 'ESP' AND tourney_date BETWEEN 20230000 AND 20231231;
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_ioc = 'ITA' AND loser_ioc = 'ESP' AND tourney_date BETWEEN 20230101 AND 20231231


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['117']
['117']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.864406779661017
Percent SQLite matched: 0.2542372881355932
Percent result matched: 0.6440677966101694


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(g.pts_home) as max_points FROM game g JOIN other_stats os ON g.game_id = os.game_id WHERE g.team_name_home = 'Miami Heat' AND os.pts_2nd_chance_home > 10;
SQLite:
SELECT MAX(pts_home) FROM game WHERE team_name_home = 'Miami Heat' AND pts_2nd_chance_home > 10
Statement valid? False
SQLite matched? False
Result matched? False


Finished: 60
Percent valid: 0.85
Percent SQLite matched: 0.25
Percent result matched: 0.6333333333333333


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(pts_fb_home) as max_fb_points FROM other_stats WHERE team_abbreviation_home = 'HOU';
SQLite:
SELECT MAX(pts_fb_home) FROM other_stats WHERE team_name_home = 'Houston Rockets'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8360655737704918
Percent SQLite matched: 0.2459016393442623
Percent result matched: 0.6229508196721312


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(pts_away) FROM game WHERE team_name_away = 'Portland Trail Blazers';
SQLite:
SELECT AVG(pts_away) FROM game WHERE team_name_away = 'Portland Trail Blazers'
['102.6668215613383']
['102.6668215613383']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8387096774193549
Percent SQLite matched: 0.25806451612903225
Percent result matched: 0.6290322580645161


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT hand FROM players WHERE name = 'Andre Agassi';
SQLite:
SELECT hand FROM players WHERE name = 'Andre Agassi'
['R']
['R']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8412698412698413
Percent SQLite matched: 0.2698412698412698
Percent result matched: 0.6349206349206349


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT pts_home + pts_away FROM game WHERE pf_home = pf_away ORDER BY game_date DESC LIMIT 1;
SQLite:
SELECT COUNT(*) FROM game WHERE pf_home = pf_away
['258.0']
['4726']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.84375
Percent SQLite matched: 0.265625
Percent result matched: 0.625


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(pts_paint_away) as total_pts_paint FROM other_stats WHERE team_abbreviation_away = 'MIL';
SQLite:
SELECT SUM(pts_paint_away) FROM other_stats WHERE team_name_away = 'Milwaukee Bucks'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8307692307692308
Percent SQLite matched: 0.26153846153846155
Percent result matched: 0.6153846153846154


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(os.pts_paint_away) as avg_pts_paint FROM other_stats os JOIN game g ON os.game_id = g.game_id WHERE g.team_name_away = 'Utah Jazz' AND g.wl_away = 'W';
SQLite:
SELECT AVG(pts_paint_away) FROM other_stats WHERE team_name_away = 'Utah Jazz' AND wl_away = 'W'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8181818181818182
Percent SQLite matched: 0.25757575757575757
Percent result matched: 0.6060606060606061


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(fg_pct_away) FROM game WHERE team_name_away = 'Los Angeles Lakers';
SQLite:
SELECT AVG(fg_pct_away) FROM game WHERE team_name_away = 'Los Angeles Lakers'
['0.46789967284623774']
['0.46789967284623774']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8208955223880597
Percent SQLite matched: 0.26865671641791045
Percent result matched: 0.6119402985074627
SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Andre Agassi' AND tourney_name = 'US Open' GROUP BY winner_name ORDER BY wins DESC LIMIT 1;
SQLite:
SELECT winner_name, COUNT(*) AS defeats FROM matches WHERE loser_name = 'Andre Agassi' AND tourney_name = 'US Open' GROUP BY winner_name ORDER BY defeats DESC LIMIT 1


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['Pete Sampras', '4']
['Pete Sampras', '4']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8235294117647058
Percent SQLite matched: 0.2647058823529412
Percent result matched: 0.6176470588235294


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(fg3m_home + fg3m_away) AS total_three_pointers FROM game WHERE season_id = '22016' AND (team_name_home = 'Golden State Warriors' OR team_name_away = 'Golden State Warriors');
SQLite:
SELECT SUM(fg3m_home) AS total_three_pointers FROM game WHERE team_name_home = 'Golden State Warriors' AND season_id = '22016'
['1719.0']
['523.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8260869565217391
Percent SQLite matched: 0.2608695652173913
Percent result matched: 0.6086956521739131


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT full_name FROM team WHERE city = 'Dallas';
SQLite:
SELECT full_name FROM team WHERE city = 'Dallas'
['Dallas Mavericks']
['Dallas Mavericks']
Statement valid? True
SQLite matched? True
Result matched? True


Finished: 70
Percent valid: 0.8285714285714286
Percent SQLite matched: 0.2714285714285714
Percent result matched: 0.6142857142857143
SELECT AVG(Points) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Pete Sampras';
SQLite: SELECT AVG(points) AS avg_points FROM rankings WHERE player IN (SELECT player_id FROM players WHERE name = 'Pete Sampras')


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['3256.267884322679']
['3256.267884322679']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8309859154929577
Percent SQLite matched: 0.2676056338028169
Percent result matched: 0.6197183098591549


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM players WHERE dob = (SELECT dob FROM players WHERE name = 'Rafael Nadal') AND name != 'Rafael Nadal';
SQLite:
SELECT COUNT(*) FROM players WHERE dob IN (SELECT dob FROM players WHERE name = 'Rafael Nadal')
['4']
['5']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8333333333333334
Percent SQLite matched: 0.2638888888888889
Percent result matched: 0.6111111111111112
SELECT COUNT(*) FROM matches WHERE winner_name = 'Carlos Alcaraz' AND loser_name = 'Novak Djokovic';
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_name = 'Carlos Alcaraz' AND loser_name = 'Novak Djokovic'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['2']
['2']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8356164383561644
Percent SQLite matched: 0.273972602739726
Percent result matched: 0.6164383561643836


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) as away_games FROM game WHERE team_name_away = 'Los Angeles Lakers' AND season_id = '21996';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_away = 'Los Angeles Lakers' AND season_id = '21996'
['41']
['41']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8378378378378378
Percent SQLite matched: 0.2702702702702703
Percent result matched: 0.6216216216216216


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT game_id, (pts_home + pts_away) AS total_points FROM game WHERE team_abbreviation_home = 'LAL' ORDER BY total_points DESC LIMIT 1;
SQLite:
SELECT game_id, team_name_home, team_name_away, pts_home, pts_away FROM game WHERE team_name_home = 'Los Angeles Lakers' AND team_name_away = 'Los Angeles Lakers' ORDER BY pts_home + pts_away DESC LIMIT 1
['0028000933', '294.0']
[]
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.84
Percent SQLite matched: 0.26666666666666666
Percent result matched: 0.6133333333333333


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(stl) AS max_steals FROM ( SELECT stl_home AS stl FROM game WHERE team_abbreviation_home = 'DET' AND season_id = '22004' UNION ALL SELECT stl_away AS stl FROM game WHERE team_abbreviation_away = 'DET' AND season_id = '22004' );
SQLite:
SELECT MAX(stl_home) FROM game WHERE team_name_home = 'Detroit Pistons' AND season_id = '22004'
['13.0']
['12.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8421052631578947
Percent SQLite matched: 0.2631578947368421
Percent result matched: 0.6052631578947368
SELECT AVG(loser_age) FROM matches WHERE loser_name = 'Alexander Zverev';
SQLite:
SELECT AVG(loser_age) FROM matches WHERE loser_name = 'Alexander Zverev'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['20.951063829787234']
['20.951063829787234']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8441558441558441
Percent SQLite matched: 0.2727272727272727
Percent result matched: 0.6103896103896104


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MIN(total_points) AS lowest_scoring_game FROM ( SELECT (pts_home + pts_away) AS total_points FROM game WHERE season_id = '21994' AND (team_abbreviation_home = 'IND' OR team_abbreviation_away = 'IND') );
SQLite:
SELECT game_id, pts_home FROM game WHERE team_name_home = 'Indiana Pacers' AND season_id = '1994' ORDER BY pts_home ASC LIMIT 1
['155.0']
[]
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8461538461538461
Percent SQLite matched: 0.2692307692307692
Percent result matched: 0.6025641025641025
SELECT MIN(minutes) FROM matches WHERE winner_name = 'Novak Djokovic' OR loser_name = 'Novak Djokovic';
SQLite:
SELECT MIN(minutes) FROM matches WHERE winner_name = 'Novak Djokovic' OR loser_name = 'Novak Djokovic'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['0.0']
['0.0']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8481012658227848
Percent SQLite matched: 0.27848101265822783
Percent result matched: 0.6075949367088608


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT dob FROM players WHERE name = 'Andy Murray';
SQLite:
SELECT dob FROM players WHERE name = 'Andy Murray'
['19870515.0']
['19870515.0']
Statement valid? True
SQLite matched? True
Result matched? True


Finished: 80
Percent valid: 0.85
Percent SQLite matched: 0.2875
Percent result matched: 0.6125


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(tov_away) FROM game WHERE team_name_away = 'Los Angeles Lakers';
SQLite:
SELECT AVG(tov_away) FROM game WHERE team_name_home = 'Los Angeles Lakers'
['14.554896142433234']
['14.996632996632997']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8518518518518519
Percent SQLite matched: 0.2839506172839506
Percent result matched: 0.6172839506172839


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(o.pts_paint_away) FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.team_abbreviation_home = 'PHI' AND g.season_id = '22020' AND o.lead_changes > 15;
SQLite:
SELECT AVG(other_stats.pts_paint_home) FROM other_stats JOIN game ON other_stats.game_id = game.game_id WHERE game.team_name_home = 'Philadelphia 76ers' AND game.season_id = '22020' AND other_stats.lead_changes > 15
['50.0']
['42.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8536585365853658
Percent SQLite matched: 0.2804878048780488
Percent result matched: 0.6097560975609756


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT team_abbreviation_away, reb_away, game_date FROM game ORDER BY reb_away DESC LIMIT 1;
SQLite:
SELECT MAX(reb_away) FROM game
['BOS', '90.0', '1957-10-22 00:00:00']
['90.0']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8554216867469879
Percent SQLite matched: 0.27710843373493976
Percent result matched: 0.6144578313253012


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(fg3_pct_away) FROM game WHERE team_abbreviation_away = 'PHX';
SQLite:
SELECT MAX(fg3_pct_away) FROM game WHERE team_name_away = 'Phoenix Suns'
['1.0']
['1.0']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.27380952380952384
Percent result matched: 0.6190476190476191


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(fg3m_home) FROM game WHERE team_name_home = 'Brooklyn Nets';
SQLite:
SELECT MAX(fg3m_home) FROM game WHERE team_name_home = 'Brooklyn Nets'
['22.0']
['22.0']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8588235294117647
Percent SQLite matched: 0.2823529411764706
Percent result matched: 0.6235294117647059
SELECT COUNT(*) FROM matches m JOIN rankings r ON m.loser_id = r.player AND m.tourney_date = r.ranking_date WHERE m.winner_name = 'Alexander Zverev' AND r.rank <= 10;
SQLite:
SELECT COUNT(*) FROM matches WHERE winner_name = 'Alexander Zverev' AND loser_rank <= 10


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['47']
['50']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8604651162790697
Percent SQLite matched: 0.27906976744186046
Percent result matched: 0.6162790697674418


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(os.pts_paint_home) FROM other_stats os JOIN game g ON os.game_id = g.game_id WHERE g.team_name_home = 'Boston Celtics' AND g.plus_minus_home >= 10;
SQLite:
SELECT AVG(pts_paint_home) AS avg_paint FROM game WHERE team_name_home = 'Boston Celtics' AND wl_home = 'W' AND pts_home - pts_away >= 10
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8505747126436781
Percent SQLite matched: 0.27586206896551724
Percent result matched: 0.6091954022988506
SELECT COUNT(DISTINCT tourney_name) FROM matches WHERE tourney_date BETWEEN 20190000 AND 20191231;
SQLite:
SELECT COUNT(DISTINCT tourney_name) FROM matches WHERE CAST(tourney_date AS INT) >= 20190101 AND CAST(tourney_date AS INT) <= 20191231


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['591']
['591']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8522727272727273
Percent SQLite matched: 0.2727272727272727
Percent result matched: 0.6136363636363636


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM game WHERE team_abbreviation_home = 'CHI' AND wl_home = 'W' AND season_id = '22010';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_home = 'Chicago Bulls' AND wl_home = 'W' AND season_id = '22010'
['36']
['36']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8539325842696629
Percent SQLite matched: 0.2696629213483146
Percent result matched: 0.6179775280898876


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(victory_margin) AS avg_victory_margin FROM ( SELECT plus_minus_home AS victory_margin FROM game WHERE team_name_home = 'Miami Heat' AND wl_home = 'W' AND season_id = '22013' UNION ALL SELECT plus_minus_away AS victory_margin FROM game WHERE team_name_away = 'Miami Heat' AND wl_away = 'W' AND season_id = '22013' ) AS victories
SQLite:
SELECT AVG(CASE WHEN wl_home = 'W' THEN pts_home - pts_away ELSE pts_away - pts_home END) AS avg_victory_margin FROM game WHERE team_name_home = 'Miami Heat' AND season_id = '22013'
['11.481481481481481']
['9.878048780487806']
Statement valid? True
SQLite matched? False
Result matched? False


Finished: 90
Percent valid: 0.8555555555555555
Percent SQLite matched: 0.26666666666666666
Percent result matched: 0.6111111111111112
SELECT COUNT(*) FROM matches WHERE tourney_name = 'Wimbledon' AND (winner_name = 'Pete Sampras' OR loser_name = 'Pete Sampras');
SQLite:
SELECT COUNT(*) FROM matches WHERE tourney_name = 'Wimbledon'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['70']
['16799']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.26373626373626374
Percent result matched: 0.6043956043956044


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(plus_minus_home) as max_plus_minus FROM game WHERE team_name_home = 'Indiana Pacers';
SQLite:
SELECT MAX(plus_minus_home) FROM game WHERE team_name_home = 'Indiana Pacers'
['65']
['65']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8586956521739131
Percent SQLite matched: 0.2608695652173913
Percent result matched: 0.6086956521739131


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT * FROM game WHERE season_id = '22015' AND ((team_abbreviation_home = 'HOU' AND team_abbreviation_away = 'DAL') OR (team_abbreviation_home = 'DAL' AND team_abbreviation_away = 'HOU'));
SQLite:
SELECT * FROM game WHERE (team_name_home = 'Houston Rockets' AND team_name_away = 'Dallas Mavericks') OR (team_name_away = 'Houston Rockets' AND team_name_home = 'Dallas Mavericks') AND season_id = '22015'
['22015', '1610612745', 'HOU', 'Houston Rockets', '0021500140', '2015-11-14 00:00:00', 'HOU vs. DAL', 'L', '240', '32.0', '84.0', '0.381', '9.0', '34.0', '0.265', '25.0', '32.0', '0.781', '12.0', '31.0', '43.0', '22.0', '9.0', '5.0', '14.0', '23.0', '98.0', '-12', '1', '1610612742', 'DAL', 'Dallas Mavericks', 'DAL @ HOU', 'W', '43.0', '89.0', '0.483', '8.0', '28.0', '0.286', '16.0', '21.0', '0.762', '8.0', '37.0', '45.0', '24.0', '6.0', '7.0', '11.0', '21.0', '110.0', '12', '1', 'Regular Season', '22015', '1610612742', 'DAL', 'Dallas Mavericks', '0021500287', '2015-12-04 00:00:00', 'DAL v

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT AVG(fg3m_home) FROM game WHERE team_abbreviation_home = 'GSW' AND season_id = '22018';
SQLite:
SELECT AVG(fg3m_home) AS avg_fg3m_home FROM game WHERE team_name_home = 'Golden State Warriors' AND season_id = '22018'
['13.195121951219512']
['13.195121951219512']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8617021276595744
Percent SQLite matched: 0.2553191489361702
Percent result matched: 0.6170212765957447
SELECT p.name, MAX(r.rank) as old_rank, MIN(r.rank) as new_rank, (MAX(r.rank) - MIN(r.rank)) as improvement FROM rankings r JOIN players p ON r.player = p.player_id GROUP BY p.player_id, p.name HAVING (MAX(r.rank) - MIN(r.rank)) > 2000 ORDER BY improvement DESC;
SQLite:
SELECT player_id, name, points FROM rankings WHERE points > 2000 ORDER BY points DESC


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8526315789473684
Percent SQLite matched: 0.25263157894736843
Percent result matched: 0.6105263157894737


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) AS high_scoring_first_quarters FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE (g.team_name_home = 'Oklahoma City Thunder' AND g.pts_home / 4 > 30) OR (g.team_name_away = 'Oklahoma City Thunder' AND g.pts_away / 4 > 30) AND g.season_id = '22017';
SQLite:
SELECT COUNT(*) FROM game WHERE team_name_home = 'Oklahoma City Thunder' AND game_date BETWEEN '2017-01-01' AND '2017-03-31' AND (pts_home - pts_away) > 30
['83']
['0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8541666666666666
Percent SQLite matched: 0.25
Percent result matched: 0.6041666666666666


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT ioc, COUNT(*) as player_count FROM players GROUP BY ioc HAVING COUNT(*) > 1000 ORDER BY player_count DESC;
SQLite:
SELECT ioc, COUNT(*) AS player_count FROM players GROUP BY ioc HAVING COUNT(*) > 1000
['USA', '13102', 'AUS', '3266', 'GBR', '3200', 'ESP', '3026', 'GER', '2675', 'ITA', '2656', 'FRA', '2582', 'BRA', '2092', 'ARG', '1759', 'MEX', '1323', 'JPN', '1305', 'RUS', '1093', 'IND', '1078', 'RSA', '1040']
['ARG', '1759', 'AUS', '3266', 'BRA', '2092', 'ESP', '3026', 'FRA', '2582', 'GBR', '3200', 'GER', '2675', 'IND', '1078', 'ITA', '2656', 'JPN', '1305', 'MEX', '1323', 'RSA', '1040', 'RUS', '1093', 'USA', '13102']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8556701030927835
Percent SQLite matched: 0.24742268041237114
Percent result matched: 0.6082474226804123


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT MAX(pts_home) FROM game WHERE team_abbreviation_home = 'GSW';
SQLite:
SELECT MAX(pts_home) FROM game WHERE team_name_home = 'Golden State Warriors'
['149.0']
['155.0']
Statement valid? True
SQLite matched? False
Result matched? False


Percent valid: 0.8571428571428571
Percent SQLite matched: 0.24489795918367346
Percent result matched: 0.6020408163265306


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT COUNT(*) FROM players WHERE hand = 'R';
SQLite:
SELECT COUNT(*) FROM players WHERE hand = 'R'
['15666']
['15666']
Statement valid? True
SQLite matched? True
Result matched? True


Percent valid: 0.8585858585858586
Percent SQLite matched: 0.25252525252525254
Percent result matched: 0.6060606060606061


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT o.pts_2nd_chance_home - o.pts_2nd_chance_away AS second_chance_diff FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.team_name_home = 'Chicago Bulls' AND g.season_id = '22016' ORDER BY ABS(g.pts_home - g.pts_away) ASC LIMIT 1;
SQLite:
SELECT ABS(pts_2nd_chance_home - pts_2nd_chance_away) AS second_chance_difference FROM game WHERE team_name_home = 'Chicago Bulls' AND season_id = '22016'
Statement valid? False
SQLite matched? False
Result matched? False


Finished: 100
Percent valid: 0.85
Percent SQLite matched: 0.25
Percent result matched: 0.6


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT SUM(pts_2nd_chance_home) as total_2nd_chance FROM other_stats WHERE team_abbreviation_home = 'MIA';
SQLite:
SELECT SUM(pts_2nd_chance_home) FROM game WHERE team_name_home = 'Miami Heat'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8415841584158416
Percent SQLite matched: 0.24752475247524752
Percent result matched: 0.594059405940594


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


SELECT pts_home FROM game WHERE game_id = (SELECT game_id FROM other_stats ORDER BY (pts_2nd_chance_home + pts_2nd_chance_away) DESC LIMIT 1);
SQLite:
SELECT pts_home FROM game WHERE pts_2nd_chance_home > 0 ORDER BY pts_2nd_chance_home DESC LIMIT 1
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8333333333333334
Percent SQLite matched: 0.24509803921568626
Percent result matched: 0.5882352941176471
SELECT COUNT(*) AS top5_count FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Pete Sampras' AND r.rank <= 5;
SQLite:
SELECT COUNT(*) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Pete Sampras' AND r.rank <= 5


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['509']
['509']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8349514563106796
Percent SQLite matched: 0.24271844660194175
Percent result matched: 0.5922330097087378
SELECT DISTINCT p.name FROM rankings r JOIN players p ON r.player = p.player_id WHERE r.rank = 1 AND r.ranking_date BETWEEN 20230000 AND 20231231;
SQLite:
SELECT DISTINCT players.name 
FROM players 
JOIN rankings ON players.player_id = rankings.player 
WHERE rankings.rank = 1 AND rankings.ranking_date BETWEEN 20230101 AND 20231231


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.


['Carlos Alcaraz', 'Novak Djokovic']
['Carlos Alcaraz', 'Novak Djokovic']
Statement valid? True
SQLite matched? False
Result matched? True


Percent valid: 0.8365384615384616
Percent SQLite matched: 0.2403846153846154
Percent result matched: 0.5961538461538461
SELECT SUM(tov) AS total_turnovers FROM ( SELECT tov_home AS tov FROM game WHERE team_abbreviation_home = 'SAC' AND season_id = '22001' UNION ALL SELECT tov_away AS tov FROM game WHERE team_abbreviation_away = 'SAC' AND season_id = '22001' );
SQLite:
SELECT SUM(total_turnovers_home) FROM other_stats WHERE team_name_home = 'Sacramento Kings' AND season_id = '22001'
Statement valid? False
SQLite matched? False
Result matched? False


Percent valid: 0.8285714285714286
Percent SQLite matched: 0.23809523809523808
Percent result matched: 0.5904761904761905


# Test Tennis Inference

In [None]:
input_text = "Which hand does Pete Sampras use?"
message = [{'role': 'user', 'content': tennis_prompt + input_text}]
inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors="pt").to(model.device)

# Generate SQL query
outputs = model.generate(
 inputs,
 max_new_tokens=256,
 eos_token_id=tokenizer.convert_tokens_to_ids("<|endofsql|>")
)
model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

print("Generated SQL:", model_output)

## Test validation set only on short queries

In [None]:
num_valid = 0
num_sql_matched = 0
num_result_matched = 0
counter = 0
for v in val_dataset:
 # Obtain sample natural language question and sql_query
 #v = val_dataset[random.randint(0, len(val_dataset) - 1)]
 if v["is_nba"]:
 prompt_length = len(nba_prompt)
 else:
 prompt_length = len(tennis_prompt)
 full_example = tokenizer.decode(v["input_ids"], skip_special_tokens=True)
 user_prompt = full_example[:prompt_length]
 question, sql_query = full_example[prompt_length:].split("SQLite:\n")
 #print(question)
 #print(sql_query)

 if len(sql_query) <= 90:
 # Obtain model output
 if v["is_nba"]: 
 message = [{'role': 'user', 'content': nba_prompt + question}]
 else:
 message = [{'role': 'user', 'content': tennis_prompt + question}]
 
 inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors="pt").to(model.device)

 # Generate SQL query
 outputs = model.generate(
 inputs,
 max_new_tokens=256,
 eos_token_id=tokenizer.convert_tokens_to_ids("<|endofsql|>")
 )
 model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

 print(sql_query)
 print(model_output.split(";")[0])
 #print()
 #print(model_output)
 result = compare_result(sql_query, model_output, v["is_nba"])
 print("Statement valid? " + str(result[0]))
 print("SQLite matched? " + str(result[1]))
 print("Result matched? " + str(result[2]))
 print()
 print()
 counter += 1

 if result[0]:
 num_valid += 1
 if result[1]:
 num_sql_matched += 1
 if result[2]:
 num_result_matched += 1

print("Percent valid: " + str(num_valid / counter))
print("Percent SQLite matched: " + str(num_sql_matched / counter))
print("Percent result matched: " + str(num_result_matched / counter))

# Test privacy breaking inference

In [None]:
# See if we can generate NBA SQL with the tennis prompt
input_text = "What is the abbreviation of the team nicknamed 'Heat'?"
message = [{'role': 'user', 'content': tennis_prompt + input_text}]
inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors="pt").to(model.device)

# Generate SQL query
outputs = model.generate(
 inputs,
 max_new_tokens=256,
 eos_token_id=tokenizer.convert_tokens_to_ids("<|endofsql|>")
)
model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

print("Generated SQL:", model_output)