AI-Demo

Sleeping

App Files Files Community

Staticaliza commited on Nov 18, 2023

Commit

d03ac46

1 Parent(s): c932dfe

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -52

app.py CHANGED Viewed

@@ -2,11 +2,12 @@ import gradio as gr
 from gpt4all import GPT4All
 from huggingface_hub import hf_hub_download
-model_path = "models"
 model_name = "openchat_3.5.Q4_K_M.gguf"
-hf_hub_download(repo_id="TheBloke/openchat_3.5-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
-model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
 model.config["promptTemplate"] = "[INST] {0} [/INST]"
 model.config["systemPrompt"] = ""
@@ -14,58 +15,61 @@ model._is_chat_session_activated = False
 max_new_tokens = 2048
-def generater(message, history, temperature, top_p, top_k):
-    prompt = "<s>"
-    for user_message, assistant_message in history:
-        prompt += model.config["promptTemplate"].format(user_message)
-        prompt += assistant_message + "</s>"
-    prompt += model.config["promptTemplate"].format(message)
-    outputs = []
-    for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
-        outputs.append(token)
-        yield "".join(outputs)
-chatbot = gr.Chatbot()
-additional_inputs=[
-    gr.Slider(
-        label="temperature",
-        value=0.5,
-        minimum=0.0,
-        maximum=2.0,
-        step=0.05,
-        interactive=True,
-        info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
-    ),
-    gr.Slider(
-        label="top_p",
-        value=1.0,
-        minimum=0.0,
-        maximum=1.0,
-        step=0.01,
-        interactive=True,
-        info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
-    ),
-    gr.Slider(
-        label="top_k",
-        value=40,
-        minimum=0,
-        maximum=1000,
-        step=1,
-        interactive=True,
-        info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
     )
-]
-iface = gr.ChatInterface(
-    fn = generater,
-    title="AI Demo",
-    chatbot=chatbot,
-    additional_inputs=additional_inputs,
-)
 with gr.Blocks() as demo:
-    iface.render()
-if __name__ == "__main__":
-    demo.queue(max_size=3).launch()

 from gpt4all import GPT4All
 from huggingface_hub import hf_hub_download
+repo_id = "TheBloke/openchat_3.5-GGUF"
 model_name = "openchat_3.5.Q4_K_M.gguf"
+hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False)
+model = model = GPT4All(model_name, "models", allow_download = False, device="cpu")
 model.config["promptTemplate"] = "[INST] {0} [/INST]"
 model.config["systemPrompt"] = ""
 max_new_tokens = 2048
+def generater(input, instruction, history, temperature, top_p, top_k, rep_p, max_tokens):
+    history = history or []
+    formatted_input = "<s>"
+    for user_message, assistant_message in history:
+        formatted_input += model.config["promptTemplate"].format(user_message)
+        formatted_input += assistant_message + "</s>"
+    formatted_input += model.config["promptTemplate"].format(input)
+    result = model.generate(
+        formatted_input,
+        temperature = temperature,
+        max_new_tokens = max_tokens,
+        top_p = top_p,
+        top_k = top_k,
+        repetition_penalty = rep_p,
+        stop_sequences = stops,
+        do_sample = True,
+        seed = seed,
+        stream = False,
+        details = False,
+        return_full_text = False
     )
+    history = history + [[input, result]]
+    return (result, input, history)
 with gr.Blocks() as demo:
+    with gr.Row(variant = "panel"):
+        gr.Markdown("A AI model test demo.")
+    with gr.Row():
+        with gr.Column():
+            history = gr.Chatbot(abel = "History", elem_id = "chatbot")
+            input = gr.Textbox(label = "Input", value = "", lines = 2)
+            instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4)
+            run = gr.Button("▶")
+            clear = gr.Button("🗑️")
+        with gr.Column():
+            temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" )
+            top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" )
+            top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" )
+            rep_p = gr.Slider( minimum = 0.01, maximum = 2, value = 1.2, step = 0.01, interactive = True, label = "Repetition Penalty" )
+            max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" )
+    with gr.Row():
+        with gr.Column():
+            output = gr.Textbox(label = "Output", value = "", lines = 50)
+    run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, rep_p, max_tokens], outputs = [output, input, history])
+    clear.click(clear_history, [], history)
+    cloud.click(maintain_cloud, inputs = [], outputs = [input, output])
+demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)