|
--- |
|
license: apache-2.0 |
|
datasets: |
|
- b-mc2/sql-create-context |
|
language: |
|
- en |
|
library_name: transformers |
|
--- |
|
# Generate SQL from text - Squeal |
|
|
|
|
|
Please use the code below as an example for how to use this model. |
|
|
|
|
|
|
|
```python |
|
import torch |
|
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
|
|
def load_model(model_name): |
|
# Load tokenizer and model with QLoRA configuration |
|
compute_dtype = getattr(torch, 'float16') |
|
|
|
bnb_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type='nf4', |
|
bnb_4bit_compute_dtype=compute_dtype, |
|
bnb_4bit_use_double_quant=False, |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
device_map={"": 0}, |
|
quantization_config=bnb_config |
|
) |
|
|
|
|
|
# Load Tokenizer |
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = "right" |
|
|
|
return model, tokenizer |
|
|
|
model, tokenizer = load_model('vagmi/squeal') |
|
|
|
prompt = "<s>[INST] Output SQL for the given table structure \n \ |
|
CREATE TABLE votes (contestant_number VARCHAR, num_votes int); \ |
|
CREATE TABLE contestants (contestant_number VARCHAR, contestant_name VARCHAR); \ |
|
What is the contestant number and name of the contestant who got least votes?[/INST]" |
|
pipe = pipeline(task="text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_length=200, |
|
device_map='auto', ) |
|
result = pipe(prompt) |
|
print(result[0]['generated_text'][len(prompt):-1]) |
|
``` |
|
|
|
## How I built it? |
|
|
|
Watch me build this model. |
|
|
|
https://www.youtube.com/watch?v=PNFhAfxR_d8 |
|
|
|
Here is the notebook I used to train this model. |
|
|
|
https://colab.research.google.com/drive/1jYX8AlRMTY7F_dH3hCFM4ljg5qEmCoUe#scrollTo=IUILKaGWhBxS |
|
|