Spaces:
Sleeping
Sleeping
Update webui.py
Browse files
webui.py
CHANGED
@@ -17,7 +17,7 @@ import llama_cpp
|
|
17 |
from llama_cpp import Llama
|
18 |
import random
|
19 |
from huggingface_hub import hf_hub_download
|
20 |
-
import spaces
|
21 |
|
22 |
#from blip.blip_engine import blip_run
|
23 |
|
@@ -25,7 +25,7 @@ import spaces
|
|
25 |
|
26 |
dir = os.getcwd()
|
27 |
|
28 |
-
|
29 |
def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
|
30 |
try:
|
31 |
global llm
|
@@ -78,7 +78,7 @@ If a question does not make any sense, or is not factually coherent, explain why
|
|
78 |
"""
|
79 |
'''
|
80 |
|
81 |
-
|
82 |
def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
|
83 |
temp = ""
|
84 |
input_prompt = f"[INST] <<SYS>>\nYou are {preset}. {system_prompt}.\n<</SYS>>\n\n "
|
@@ -217,7 +217,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
|
|
217 |
|
218 |
|
219 |
|
220 |
-
model.change(load_model, inputs=[model, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale], outputs=model, api_name=False, queue=
|
221 |
reload_model.click(load_model, inputs=[model, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale], outputs=model, api_name=False, queue=True)
|
222 |
|
223 |
|
|
|
17 |
from llama_cpp import Llama
|
18 |
import random
|
19 |
from huggingface_hub import hf_hub_download
|
20 |
+
#import spaces
|
21 |
|
22 |
#from blip.blip_engine import blip_run
|
23 |
|
|
|
25 |
|
26 |
dir = os.getcwd()
|
27 |
|
28 |
+
#@spaces.GPU
|
29 |
def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
|
30 |
try:
|
31 |
global llm
|
|
|
78 |
"""
|
79 |
'''
|
80 |
|
81 |
+
#@spaces.GPU
|
82 |
def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
|
83 |
temp = ""
|
84 |
input_prompt = f"[INST] <<SYS>>\nYou are {preset}. {system_prompt}.\n<</SYS>>\n\n "
|
|
|
217 |
|
218 |
|
219 |
|
220 |
+
model.change(load_model, inputs=[model, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale], outputs=model, api_name=False, queue=False)
|
221 |
reload_model.click(load_model, inputs=[model, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale], outputs=model, api_name=False, queue=True)
|
222 |
|
223 |
|