Spaces:

Nymbo
/

TensorLM-for-HF

Sleeping

App Files Files Community

ehristoforu commited on Feb 4, 2024

Commit

01037d2

verified ·

1 Parent(s): f0b3d79

Update webui.py

Browse files

Files changed (1) hide show

webui.py +24 -29

webui.py CHANGED Viewed

@@ -25,33 +25,8 @@ from huggingface_hub import hf_hub_download
 dir = os.getcwd()
-#@spaces.GPU
-def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
-    try:
-        global llm
-        llm = Llama(
-            model_path=f"{dir}/models/{path}",
-            n_ctx=n_ctx,
-            n_gpu_layers=n_gpu_layers,
-            n_threads=n_threads,
-            verbose=verbose,
-            f16_kv=f16_kv,
-            logits_all=logits_all,
-            vocab_only=vocab_only,
-            use_mmap=use_mmap,
-            use_mlock=use_mlock,
-            n_batch=n_batch,
-            last_n_tokens_size=last_n_tokens_size,
-            low_vram=low_vram,
-            rope_freq_base=rope_freq_base,
-            rope_freq_scale=rope_freq_scale,
-        )
-        return path
-    except:
-        return ""
 def list_models(name):
@@ -69,6 +44,26 @@ hf_hub_download(
     cache_dir=".cache",
 )
 history = []
@@ -79,7 +74,7 @@ If a question does not make any sense, or is not factually coherent, explain why
 """
 '''
-load_model("llama-2-7b-chat.ggmlv3.q2_K.bin", 2048, 0, 32, True, True, False, False, True, False, 512, 64, False, 10000, 1)
 #@spaces.GPU
 def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
@@ -116,7 +111,7 @@ def generate_text(message, history, system_prompt, preset, temperature, max_toke
-chatbot = gr.Chatbot(show_label=False, layout="panel", show_copy_button=True, height=500, min_width=180, autofocus=False)
 with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css") as demo:
     with gr.Row():

 dir = os.getcwd()
 def list_models(name):
     cache_dir=".cache",
 )
+time.sleep(1.5)
+llm = Llama(
+    model_path=f"{dir}/models/llama-2-7b-chat.ggmlv3.q2_K.bin",
+    n_ctx=2048,
+    n_gpu_layers=0,
+    n_threads=32,
+    verbose=True,
+    f16_kv=True,
+    logits_all=False,
+    vocab_only=False,
+    use_mmap=True,
+    use_mlock=False,
+    n_batch=512,
+    last_n_tokens_size=64,
+    low_vram=False,
+    rope_freq_base=10000,
+    rope_freq_scale=1,
+)
 history = []
 """
 '''
 #@spaces.GPU
 def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
+chatbot = gr.Chatbot(show_label=False, autofocus=False, layout="panel", show_copy_button=True, height=500, min_width=180)
 with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css") as demo:
     with gr.Row():