Staticaliza
commited on
Commit
·
d03ac46
1
Parent(s):
c932dfe
Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,12 @@ import gradio as gr
|
|
2 |
from gpt4all import GPT4All
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
-
|
6 |
model_name = "openchat_3.5.Q4_K_M.gguf"
|
7 |
-
hf_hub_download(repo_id="TheBloke/openchat_3.5-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
|
8 |
|
9 |
-
|
|
|
|
|
10 |
|
11 |
model.config["promptTemplate"] = "[INST] {0} [/INST]"
|
12 |
model.config["systemPrompt"] = ""
|
@@ -14,58 +15,61 @@ model._is_chat_session_activated = False
|
|
14 |
|
15 |
max_new_tokens = 2048
|
16 |
|
17 |
-
def generater(
|
18 |
-
prompt = "<s>"
|
19 |
-
for user_message, assistant_message in history:
|
20 |
-
prompt += model.config["promptTemplate"].format(user_message)
|
21 |
-
prompt += assistant_message + "</s>"
|
22 |
-
prompt += model.config["promptTemplate"].format(message)
|
23 |
-
outputs = []
|
24 |
-
for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
|
25 |
-
outputs.append(token)
|
26 |
-
yield "".join(outputs)
|
27 |
-
|
28 |
-
chatbot = gr.Chatbot()
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
minimum=0,
|
53 |
-
maximum=1000,
|
54 |
-
step=1,
|
55 |
-
interactive=True,
|
56 |
-
info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
|
57 |
)
|
58 |
-
]
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
chatbot=chatbot,
|
64 |
-
additional_inputs=additional_inputs,
|
65 |
-
)
|
66 |
|
67 |
with gr.Blocks() as demo:
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
2 |
from gpt4all import GPT4All
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
+
repo_id = "TheBloke/openchat_3.5-GGUF"
|
6 |
model_name = "openchat_3.5.Q4_K_M.gguf"
|
|
|
7 |
|
8 |
+
hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False)
|
9 |
+
|
10 |
+
model = model = GPT4All(model_name, "models", allow_download = False, device="cpu")
|
11 |
|
12 |
model.config["promptTemplate"] = "[INST] {0} [/INST]"
|
13 |
model.config["systemPrompt"] = ""
|
|
|
15 |
|
16 |
max_new_tokens = 2048
|
17 |
|
18 |
+
def generater(input, instruction, history, temperature, top_p, top_k, rep_p, max_tokens):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
history = history or []
|
21 |
+
formatted_input = "<s>"
|
22 |
+
|
23 |
+
for user_message, assistant_message in history:
|
24 |
+
formatted_input += model.config["promptTemplate"].format(user_message)
|
25 |
+
formatted_input += assistant_message + "</s>"
|
26 |
+
|
27 |
+
formatted_input += model.config["promptTemplate"].format(input)
|
28 |
+
|
29 |
+
result = model.generate(
|
30 |
+
formatted_input,
|
31 |
+
temperature = temperature,
|
32 |
+
max_new_tokens = max_tokens,
|
33 |
+
top_p = top_p,
|
34 |
+
top_k = top_k,
|
35 |
+
repetition_penalty = rep_p,
|
36 |
+
stop_sequences = stops,
|
37 |
+
do_sample = True,
|
38 |
+
seed = seed,
|
39 |
+
stream = False,
|
40 |
+
details = False,
|
41 |
+
return_full_text = False
|
|
|
|
|
|
|
|
|
|
|
42 |
)
|
|
|
43 |
|
44 |
+
history = history + [[input, result]]
|
45 |
+
|
46 |
+
return (result, input, history)
|
|
|
|
|
|
|
47 |
|
48 |
with gr.Blocks() as demo:
|
49 |
+
with gr.Row(variant = "panel"):
|
50 |
+
gr.Markdown("A AI model test demo.")
|
51 |
+
|
52 |
+
with gr.Row():
|
53 |
+
with gr.Column():
|
54 |
+
history = gr.Chatbot(abel = "History", elem_id = "chatbot")
|
55 |
+
input = gr.Textbox(label = "Input", value = "", lines = 2)
|
56 |
+
instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4)
|
57 |
+
run = gr.Button("▶")
|
58 |
+
clear = gr.Button("🗑️")
|
59 |
+
|
60 |
+
with gr.Column():
|
61 |
+
temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" )
|
62 |
+
top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" )
|
63 |
+
top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" )
|
64 |
+
rep_p = gr.Slider( minimum = 0.01, maximum = 2, value = 1.2, step = 0.01, interactive = True, label = "Repetition Penalty" )
|
65 |
+
max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" )
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
with gr.Column():
|
69 |
+
output = gr.Textbox(label = "Output", value = "", lines = 50)
|
70 |
|
71 |
+
run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, rep_p, max_tokens], outputs = [output, input, history])
|
72 |
+
clear.click(clear_history, [], history)
|
73 |
+
cloud.click(maintain_cloud, inputs = [], outputs = [input, output])
|
74 |
+
|
75 |
+
demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)
|