Spaces:
Sleeping
Sleeping
Update webui.py
Browse files
webui.py
CHANGED
@@ -25,33 +25,8 @@ from huggingface_hub import hf_hub_download
|
|
25 |
|
26 |
dir = os.getcwd()
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
try:
|
31 |
-
global llm
|
32 |
-
llm = Llama(
|
33 |
-
model_path=f"{dir}/models/{path}",
|
34 |
-
n_ctx=n_ctx,
|
35 |
-
n_gpu_layers=n_gpu_layers,
|
36 |
-
n_threads=n_threads,
|
37 |
-
verbose=verbose,
|
38 |
-
f16_kv=f16_kv,
|
39 |
-
logits_all=logits_all,
|
40 |
-
vocab_only=vocab_only,
|
41 |
-
use_mmap=use_mmap,
|
42 |
-
use_mlock=use_mlock,
|
43 |
-
n_batch=n_batch,
|
44 |
-
last_n_tokens_size=last_n_tokens_size,
|
45 |
-
low_vram=low_vram,
|
46 |
-
rope_freq_base=rope_freq_base,
|
47 |
-
rope_freq_scale=rope_freq_scale,
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
)
|
52 |
-
return path
|
53 |
-
except:
|
54 |
-
return ""
|
55 |
|
56 |
|
57 |
def list_models(name):
|
@@ -69,6 +44,26 @@ hf_hub_download(
|
|
69 |
cache_dir=".cache",
|
70 |
)
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
history = []
|
74 |
|
@@ -79,7 +74,7 @@ If a question does not make any sense, or is not factually coherent, explain why
|
|
79 |
"""
|
80 |
'''
|
81 |
|
82 |
-
|
83 |
|
84 |
#@spaces.GPU
|
85 |
def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
|
@@ -116,7 +111,7 @@ def generate_text(message, history, system_prompt, preset, temperature, max_toke
|
|
116 |
|
117 |
|
118 |
|
119 |
-
chatbot = gr.Chatbot(show_label=False, layout="panel", show_copy_button=True, height=500, min_width=180
|
120 |
|
121 |
with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css") as demo:
|
122 |
with gr.Row():
|
|
|
25 |
|
26 |
dir = os.getcwd()
|
27 |
|
28 |
+
|
29 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
def list_models(name):
|
|
|
44 |
cache_dir=".cache",
|
45 |
)
|
46 |
|
47 |
+
time.sleep(1.5)
|
48 |
+
|
49 |
+
llm = Llama(
|
50 |
+
model_path=f"{dir}/models/llama-2-7b-chat.ggmlv3.q2_K.bin",
|
51 |
+
n_ctx=2048,
|
52 |
+
n_gpu_layers=0,
|
53 |
+
n_threads=32,
|
54 |
+
verbose=True,
|
55 |
+
f16_kv=True,
|
56 |
+
logits_all=False,
|
57 |
+
vocab_only=False,
|
58 |
+
use_mmap=True,
|
59 |
+
use_mlock=False,
|
60 |
+
n_batch=512,
|
61 |
+
last_n_tokens_size=64,
|
62 |
+
low_vram=False,
|
63 |
+
rope_freq_base=10000,
|
64 |
+
rope_freq_scale=1,
|
65 |
+
)
|
66 |
+
|
67 |
|
68 |
history = []
|
69 |
|
|
|
74 |
"""
|
75 |
'''
|
76 |
|
77 |
+
|
78 |
|
79 |
#@spaces.GPU
|
80 |
def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
|
|
|
111 |
|
112 |
|
113 |
|
114 |
+
chatbot = gr.Chatbot(show_label=False, autofocus=False, layout="panel", show_copy_button=True, height=500, min_width=180)
|
115 |
|
116 |
with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css") as demo:
|
117 |
with gr.Row():
|