|
import os |
|
import gradio as gr |
|
import sqlparse |
|
import requests |
|
from time import sleep |
|
import re |
|
import platform |
|
from transformers import ( |
|
AutoModelForCausalLM, |
|
AutoTokenizer, |
|
StoppingCriteria, |
|
StoppingCriteriaList, |
|
TextIteratorStreamer |
|
) |
|
from threading import Event, Thread |
|
|
|
import firebase_admin |
|
from firebase_admin import credentials, firestore |
|
import json |
|
import base64 |
|
import torch |
|
|
|
|
|
print(f"Running on {platform.system()}") |
|
|
|
if platform.system() == "Windows" or platform.system() == "Darwin": |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
quantized_model = "richardr1126/spider-skeleton-wizard-coder-8bit" |
|
merged_model = "richardr1126/spider-skeleton-wizard-coder-merged" |
|
initial_model = "WizardLM/WizardCoder-15B-V1.0" |
|
lora_model = "richardr1126/spider-skeleton-wizard-coder-qlora" |
|
dataset = "richardr1126/spider-skeleton-context-instruct" |
|
|
|
|
|
|
|
base64_string = os.getenv('FIREBASE') |
|
base64_bytes = base64_string.encode('utf-8') |
|
json_bytes = base64.b64decode(base64_bytes) |
|
json_data = json_bytes.decode('utf-8') |
|
|
|
firebase_auth = json.loads(json_data) |
|
|
|
|
|
cred = credentials.Certificate(firebase_auth) |
|
firebase_admin.initialize_app(cred) |
|
db = firestore.client() |
|
|
|
def log_message_to_firestore(input_message, db_info, temperature, response_text): |
|
doc_ref = db.collection('logs').document() |
|
log_data = { |
|
'timestamp': firestore.SERVER_TIMESTAMP, |
|
'temperature': temperature, |
|
'db_info': db_info, |
|
'input': input_message, |
|
'output': response_text, |
|
} |
|
doc_ref.set(log_data) |
|
|
|
rated_outputs = set() |
|
|
|
def log_rating_to_firestore(input_message, db_info, temperature, response_text, rating): |
|
global rated_outputs |
|
output_id = f"{input_message} {db_info} {response_text} {temperature}" |
|
|
|
if output_id in rated_outputs: |
|
gr.Warning("You've already rated this output!") |
|
return |
|
if not input_message or not response_text or not rating: |
|
gr.Info("You haven't asked a question yet!") |
|
return |
|
|
|
rated_outputs.add(output_id) |
|
|
|
doc_ref = db.collection('ratings').document() |
|
log_data = { |
|
'timestamp': firestore.SERVER_TIMESTAMP, |
|
'temperature': temperature, |
|
'db_info': db_info, |
|
'input': input_message, |
|
'output': response_text, |
|
'rating': rating, |
|
} |
|
doc_ref.set(log_data) |
|
gr.Info("Thanks for your feedback!") |
|
|
|
|
|
def format(text): |
|
|
|
try: |
|
final_query = text.split("|")[1].strip() |
|
except Exception: |
|
final_query = text |
|
|
|
try: |
|
|
|
formatted_query = sqlparse.format(final_query, reindent=True, keyword_case='upper') |
|
except Exception: |
|
|
|
formatted_query = final_query |
|
|
|
|
|
final_query_markdown = f"{formatted_query}" |
|
|
|
return final_query_markdown |
|
|
|
model_name = os.getenv("HF_MODEL_NAME", None) |
|
tok = AutoTokenizer.from_pretrained(model_name) |
|
|
|
max_new_tokens = 1024 |
|
|
|
print(f"Starting to load the model {model_name}") |
|
|
|
m = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
device_map=0, |
|
|
|
) |
|
|
|
m.config.pad_token_id = m.config.eos_token_id |
|
m.generation_config.pad_token_id = m.config.eos_token_id |
|
|
|
print(f"Successfully loaded the model {model_name} into memory") |
|
|
|
|
|
def generate(input_message: str, db_info="", temperature=0.2, top_p=0.9, top_k=0, repetition_penalty=1.08, format_sql=True, log=False, num_return_sequences=1, num_beams=1, do_sample=False): |
|
stop_token_ids = tok.convert_tokens_to_ids(["###"]) |
|
class StopOnTokens(StoppingCriteria): |
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: |
|
for stop_id in stop_token_ids: |
|
if input_ids[0][-1] == stop_id: |
|
return True |
|
return False |
|
stop = StopOnTokens() |
|
|
|
|
|
messages = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: {input_message} {db_info}\n\n### Response:\n\n" |
|
|
|
input_ids = tok(messages, return_tensors="pt").input_ids |
|
input_ids = input_ids.to(m.device) |
|
streamer = TextIteratorStreamer(tok, timeout=1000.0, skip_prompt=True, skip_special_tokens=True) |
|
generate_kwargs = dict( |
|
input_ids=input_ids, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
top_k=top_k, |
|
repetition_penalty=repetition_penalty, |
|
streamer=streamer, |
|
stopping_criteria=StoppingCriteriaList([stop]), |
|
num_return_sequences=num_return_sequences, |
|
num_beams=num_beams, |
|
do_sample=do_sample, |
|
) |
|
|
|
stream_complete = Event() |
|
|
|
def generate_and_signal_complete(): |
|
m.generate(**generate_kwargs) |
|
stream_complete.set() |
|
|
|
t1 = Thread(target=generate_and_signal_complete) |
|
t1.start() |
|
|
|
partial_text = "" |
|
for new_text in streamer: |
|
partial_text += new_text |
|
|
|
output = format(partial_text) if format_sql else partial_text |
|
|
|
if log: |
|
|
|
log_message_to_firestore(input_message, db_info, temperature, output) |
|
|
|
return output |
|
|
|
|
|
with gr.Blocks(theme='gradio/soft') as demo: |
|
|
|
header = gr.HTML(""" |
|
<h1 style="text-align: center">SQL Skeleton WizardCoder Demo</h1> |
|
<h3 style="text-align: center">π·οΈβ οΈπ§ββοΈ Generate SQL queries from Natural Language π·οΈβ οΈπ§ββοΈ</h3> |
|
<div style="max-width: 450px; margin: auto; text-align: center"> |
|
<p style="font-size: 12px; text-align: center">β οΈ Should take 30-60s to generate. Please rate the response, it helps a lot. If you get a blank output, the model server is currently down, please try again another time.</p> |
|
</div> |
|
""") |
|
|
|
output_box = gr.Code(label="Generated SQL", lines=2, interactive=False) |
|
|
|
with gr.Row(): |
|
rate_up = gr.Button("π", variant="secondary") |
|
rate_down = gr.Button("π", variant="secondary") |
|
|
|
input_text = gr.Textbox(lines=3, placeholder='Write your question here...', label='NL Input') |
|
db_info = gr.Textbox(lines=4, placeholder='Make sure to place your tables information inside || for better results. Example: | table_01 : column_01 , column_02 | table_02 : column_01 , column_02 | ...', label='Database Info') |
|
format_sql = gr.Checkbox(label="Format SQL + Remove Skeleton", value=True, interactive=True) |
|
|
|
with gr.Row(): |
|
run_button = gr.Button("Generate SQL", variant="primary") |
|
clear_button = gr.ClearButton(variant="secondary") |
|
|
|
with gr.Accordion("Options", open=False): |
|
temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.2, step=0.1) |
|
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.0, maximum=1.0, value=0.9, step=0.01) |
|
top_k = gr.Slider(label="Top-k", minimum=0, maximum=200, value=0, step=1) |
|
repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.08, step=0.01) |
|
|
|
with gr.Accordion("Generation strategies", open=False): |
|
num_return_sequences = gr.Slider(label="Num Return Sequences", minimum=1, maximum=5, value=1, step=1) |
|
num_beams = gr.Slider(label="Num Beams", minimum=1, maximum=5, value=1, step=1) |
|
do_sample = gr.Checkbox(label="Do Sample", value=False, interactive=True) |
|
|
|
info = gr.HTML(f""" |
|
<p>π Leveraging the <a href='https://huggingface.co/{quantized_model}'><strong>bitsandbytes 8-bit version</strong></a> of <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a> model.</p> |
|
<p>π How it's made: <a href='https://huggingface.co/{initial_model}'><strong>{initial_model}</strong></a> was finetuned to create <a href='https://huggingface.co/{lora_model}'><strong>{lora_model}</strong></a>, then merged together to create <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a>.</p> |
|
<p>π Fine-tuning was performed using QLoRA techniques on the <a href='https://huggingface.co/datasets/{dataset}'><strong>{dataset}</strong></a> dataset. You can view training metrics on the <a href='https://huggingface.co/{lora_model}'><strong>QLoRa adapter HF Repo</strong></a>.</p> |
|
<p>π All inputs/outputs are logged to Firebase to see how the model is doing. You can also leave a rating for each generated SQL the model produces, which gets sent to the database as well.</a></p> |
|
""") |
|
|
|
examples = gr.Examples([ |
|
["What is the average, minimum, and maximum age of all singers from France?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["How many students have dogs?", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid | pets.pettype = 'Dog' |"], |
|
], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty, format_sql], fn=generate, cache_examples=False if platform.system() == "Windows" or platform.system() == "Darwin" else True, outputs=output_box) |
|
|
|
with gr.Accordion("More Examples", open=False): |
|
examples = gr.Examples([ |
|
["What is the average weight of pets of all students?", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
["How many male singers performed in concerts in the year 2023?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["For students who have pets, how many pets does each student have? List their ids instead of names.", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
["Show location and name for all stadiums with a capacity between 5000 and 10000.", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["What are the number of concerts that occurred in the stadium with the largest capacity ?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["Which student has the oldest pet?", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
["List the names of all singers who performed in a concert with the theme 'Rock'", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["List all students who don't have pets.", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty, format_sql], fn=generate, cache_examples=False, outputs=output_box) |
|
|
|
|
|
readme_content = requests.get(f"https://huggingface.co/{merged_model}/raw/main/README.md").text |
|
readme_content = re.sub('---.*?---', '', readme_content, flags=re.DOTALL) |
|
|
|
with gr.Accordion("π Model Readme", open=True): |
|
readme = gr.Markdown( |
|
readme_content, |
|
) |
|
|
|
with gr.Accordion("Disabled Options:", open=False): |
|
log = gr.Checkbox(label="Log to Firebase", value=True, interactive=False) |
|
|
|
|
|
run_button.click(fn=generate, inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty, format_sql, log, num_return_sequences, num_beams, do_sample], outputs=output_box, api_name="txt2sql") |
|
clear_button.add([input_text, db_info, output_box]) |
|
|
|
|
|
rate_up.click(fn=log_rating_to_firestore, inputs=[input_text, db_info, temperature, output_box, rate_up]) |
|
rate_down.click(fn=log_rating_to_firestore, inputs=[input_text, db_info, temperature, output_box, rate_down]) |
|
|
|
demo.queue(concurrency_count=1, max_size=20).launch(debug=True) |