|
import os |
|
import gradio as gr |
|
import sqlparse |
|
import requests |
|
from time import sleep |
|
import re |
|
import platform |
|
|
|
import firebase_admin |
|
from firebase_admin import credentials, firestore |
|
import json |
|
import base64 |
|
|
|
print(f"Running on {platform.system()}") |
|
|
|
if platform.system() == "Windows" or platform.system() == "Darwin": |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
quantized_model = "richardr1126/spider-skeleton-wizard-coder-ggml" |
|
merged_model = "richardr1126/spider-skeleton-wizard-coder-merged" |
|
initial_model = "WizardLM/WizardCoder-15B-V1.0" |
|
lora_model = "richardr1126/spider-skeleton-wizard-coder-qlora" |
|
dataset = "richardr1126/spider-skeleton-context-instruct" |
|
|
|
def log_to_firestore(input_message, db_info, temperature, response_text): |
|
|
|
base64_string = os.getenv('FIREBASE') |
|
base64_bytes = base64_string.encode('utf-8') |
|
json_bytes = base64.b64decode(base64_bytes) |
|
json_data = json_bytes.decode('utf-8') |
|
|
|
firebase_auth = json.loads(json_data) |
|
|
|
|
|
cred = credentials.Certificate(firebase_auth) |
|
firebase_admin.initialize_app(cred) |
|
db = firestore.client() |
|
|
|
doc_ref = db.collection('logs').document() |
|
log_data = { |
|
'timestamp': firestore.SERVER_TIMESTAMP, |
|
'temperature': temperature, |
|
'db_info': db_info, |
|
'input': input_message, |
|
'output': response_text |
|
} |
|
doc_ref.set(log_data) |
|
|
|
|
|
def format(text): |
|
|
|
try: |
|
final_query = text.split("|")[1].strip() |
|
except Exception: |
|
final_query = text |
|
|
|
try: |
|
|
|
formatted_query = sqlparse.format(final_query, reindent=True, keyword_case='upper') |
|
except Exception: |
|
|
|
formatted_query = final_query |
|
|
|
|
|
final_query_markdown = f"{formatted_query}" |
|
|
|
return final_query_markdown |
|
|
|
|
|
|
|
def generate(input_message: str, db_info="", temperature=0.1, top_p=0.9, top_k=0, repetition_penalty=1.08, format_sql=True, stop_sequence="###", log=False): |
|
|
|
messages = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: {input_message} {db_info}\n\n### Response:\n\n" |
|
|
|
url = "https://e9f4be879d38-8269039109365193683.ngrok-free.app/api/v1/generate" |
|
stop_sequence = stop_sequence.split(",") |
|
stop = ["###"] + stop_sequence |
|
payload = { |
|
"prompt": messages, |
|
"temperature": temperature, |
|
"top_p": top_p, |
|
"top_k": top_k, |
|
"top_a": 0, |
|
"n": 1, |
|
"max_context_length": 2048, |
|
"max_length": 256, |
|
"rep_pen": repetition_penalty, |
|
"sampler_order": [6,0,1,3,4,2,5], |
|
"stop_sequence": stop, |
|
} |
|
headers = { |
|
"Content-Type": "application/json", |
|
"ngrok-skip-browser-warning": "1" |
|
} |
|
|
|
for _ in range(3): |
|
try: |
|
response = requests.post(url, json=payload, headers=headers) |
|
response_text = response.json()["results"][0]["text"] |
|
response_text = response_text.replace("\n", "").replace("\t", " ") |
|
if response_text and response_text[-1] == ".": |
|
response_text = response_text[:-1] |
|
|
|
|
|
formatted_query = format(response_text) |
|
log_to_firestore(input_message, db_info, temperature, formatted_query if format_sql else response_text) |
|
|
|
if format_sql: |
|
return formatted_query |
|
else: |
|
return response_text |
|
|
|
|
|
except Exception as e: |
|
print(f'Error occurred: {str(e)}') |
|
print('Waiting for 10 seconds before retrying...') |
|
sleep(10) |
|
|
|
|
|
with gr.Blocks(theme='gradio/soft') as demo: |
|
|
|
header = gr.HTML(""" |
|
<h1 style="text-align: center">SQL Skeleton WizardCoder Demo</h1> |
|
<h3 style="text-align: center">π·οΈβ οΈπ§ββοΈ Generate SQL queries from Natural Language π·οΈβ οΈπ§ββοΈ</h3> |
|
""") |
|
|
|
output_box = gr.Code(label="Generated SQL", lines=2, interactive=True) |
|
note = gr.HTML("""<p style="font-size: 12px; text-align: center">β οΈ Should take 30-60s to generate</p>""") |
|
input_text = gr.Textbox(lines=3, placeholder='Write your question here...', label='NL Input') |
|
db_info = gr.Textbox(lines=4, placeholder='Example: | table_01 : column_01 , column_02 | table_02 : column_01 , column_02 | ...', label='Database Info') |
|
|
|
with gr.Accordion("Options", open=False): |
|
temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1) |
|
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.0, maximum=1.0, value=0.9, step=0.01) |
|
top_k = gr.Slider(label="Top-k", minimum=0, maximum=200, value=0, step=1) |
|
repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.08, step=0.01) |
|
format_sql = gr.Checkbox(label="Format SQL + Remove Skeleton", value=True, interactive=True) |
|
stop_sequence = gr.Textbox(lines=1, value="Explanation,Note", label='Extra Stop Sequence') |
|
|
|
|
|
run_button = gr.Button("Generate SQL", variant="primary") |
|
|
|
run_button.click(fn=generate, inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty, format_sql, stop_sequence], outputs=output_box, api_name="txt2sql") |
|
|
|
|
|
info = gr.HTML(f""" |
|
<p>π Leveraging the <a href='https://huggingface.co/{quantized_model}'><strong>4-bit GGML version</strong></a> of <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a> model.</p> |
|
<p>π How it's made: <a href='https://huggingface.co/{initial_model}'><strong>{initial_model}</strong></a> was finetuned to create <a href='https://huggingface.co/{lora_model}'><strong>{lora_model}</strong></a>, then merged together to create <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a>.</p> |
|
<p>π Fine-tuning was performed using QLoRA techniques on the <a href='https://huggingface.co/datasets/{dataset}'><strong>{dataset}</strong></a> dataset. You can view training metrics on the <a href='https://huggingface.co/{lora_model}'><strong>QLoRa adapter HF Repo</strong></a>.</p> |
|
<p>π All inputs/outputs are logged to Firebase, to help me see where the model still needs improvements.</a>.</p> |
|
""") |
|
|
|
with gr.Accordion("Examples", open=True): |
|
examples = gr.Examples([ |
|
["What is the average, minimum, and maximum age of all singers from France?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["How many students have dogs?", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid | pets.pettype = 'Dog' |"], |
|
["What is the average weight of pets of all students?", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
["How many male singers performed in concerts in the year 2023?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty, format_sql, stop_sequence], fn=generate, cache_examples=False if platform.system() == "Windows" or platform.system() == "Darwin" else True, outputs=output_box) |
|
|
|
with gr.Accordion("More Examples", open=False): |
|
examples = gr.Examples([ |
|
["For students who have pets, how many pets does each student have? List their ids instead of names.", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
["Show location and name for all stadiums with a capacity between 5000 and 10000.", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["What are the number of concerts that occurred in the stadium with the largest capacity ?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["Which student has the oldest pet?", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
["List the names of all singers who performed in a concert with the theme 'Rock'", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"], |
|
["List all students who don't have pets.", "| student : stuid , lname , fname , age , sex , major , advisor , city_code | has_pet : stuid , petid | pets : petid , pettype , pet_age , weight | has_pet.stuid = student.stuid | has_pet.petid = pets.petid |"], |
|
], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty, format_sql, stop_sequence], fn=generate, cache_examples=False, outputs=output_box) |
|
|
|
|
|
readme_content = requests.get(f"https://huggingface.co/{merged_model}/raw/main/README.md").text |
|
readme_content = re.sub('---.*?---', '', readme_content, flags=re.DOTALL) |
|
|
|
with gr.Accordion("π Model Readme", open=True): |
|
readme = gr.Markdown( |
|
readme_content, |
|
) |
|
|
|
demo.queue(concurrency_count=1, max_size=20).launch(debug=True) |