File size: 7,874 Bytes
40c895f
 
 
 
 
 
 
 
 
 
 
 
66983ee
40c895f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f23744
40c895f
 
ba3b60e
5f23744
40c895f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d119dc4
 
66983ee
 
 
d119dc4
40c895f
 
 
 
66983ee
ba3b60e
40c895f
 
 
 
 
b725df4
89ed098
 
b725df4
1a7b39d
 
 
 
 
89ed098
40c895f
b725df4
ba3b60e
 
 
b725df4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
from threading import Event, Thread
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    StoppingCriteria,
    StoppingCriteriaList,
    TextIteratorStreamer,
)
from huggingface_hub import login
import gradio as gr
import torch
import markdown

login(os.getenv("HF_TOKEN", None))

model_name = "richardr1126/spider-natsql-wizard-coder-8bit"
tok = AutoTokenizer.from_pretrained(model_name)

max_new_tokens = 1536

print(f"Starting to load the model {model_name}")

m = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=0,
    load_in_8bit=True,
)

m.config.pad_token_id = m.config.eos_token_id
m.generation_config.pad_token_id = m.config.eos_token_id

stop_tokens = [";", "###", "Result"]
stop_token_ids = tok.convert_tokens_to_ids(stop_tokens)

print(f"Successfully loaded the model {model_name} into memory")

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

def bot(input_message: str, db_info="", temperature=0.1, top_p=0.9, top_k=0, repetition_penalty=1.08):
    stop = StopOnTokens()

    # Format the user's input message
    messages = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: {input_message} {db_info}\n\n### Response:\n\n"

    input_ids = tok(messages, return_tensors="pt").input_ids
    input_ids = input_ids.to(m.device)
    streamer = TextIteratorStreamer(tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        input_ids=input_ids,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=temperature > 0.0,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        streamer=streamer,
        stopping_criteria=StoppingCriteriaList([stop]),
    )

    stream_complete = Event()

    def generate_and_signal_complete():
        m.generate(**generate_kwargs)
        stream_complete.set()

    t1 = Thread(target=generate_and_signal_complete)
    t1.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text

    # Split the text by "|", and get the last element in the list which should be the final query
    final_query = partial_text.split("|")[-1].strip()
    # Convert SQL to markdown (not required, but just to show how to use the markdown module)
    final_query_markdown = f'```sql\n{final_query}\n```'
    return markdown.markdown(final_query_markdown)


gradio_interface = gr.Interface(
  fn=bot,
  inputs=[
      gr.Textbox(lines=20, placeholder='Input text here...', label='Input Text'),
      gr.Textbox(lines=20, placeholder='(Recommended) Example: | stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id | ', label='Databse Info'),
      gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.1, step=0.1),
      gr.Slider(label="Top-p (nucleus sampling)", minimum=0.0, maximum=1.0, value=0.9, step=0.01),
      gr.Slider(label="Top-k", minimum=0, maximum=200, value=0, step=1),
      gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.08, step=0.1)
  ],
  outputs=gr.Markdown(),
  title="SQL Skeleton WizardCoder Demo",
  description="""This interactive tool translates natural language instructions into SQL queries, using a trained model. Type or paste your instructions into the text box and click 'Submit' to generate SQL queries. Use the sliders to adjust the model's temperature, top-p, top-k, and repetition penalty values.""",
  examples = [
    ["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: What is the average, minimum, and maximum age for all French singers? | stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id | \n\n### Response:\n\n"],
    ["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: Show location and name for all stadiums with a capacity between 5000 and 10000. | stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id | \n\n### Response:\n\n"],
    ["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: What are the number of concerts that occurred in the stadium with the largest capacity ? | stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id | \n\n### Response:\n\n"],
    ["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: How many male singers performed in concerts in the year 2023? | stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id | ### Response: "],
    ["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: List the names of all singers who performed in a concert with the theme 'Rock' | stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id | \n\n### Response:\n\n"],
  ]
)

gradio_interface.launch()


gradio_interface.launch()