ehristoforu commited on
Commit
01037d2
·
verified ·
1 Parent(s): f0b3d79

Update webui.py

Browse files
Files changed (1) hide show
  1. webui.py +24 -29
webui.py CHANGED
@@ -25,33 +25,8 @@ from huggingface_hub import hf_hub_download
25
 
26
  dir = os.getcwd()
27
 
28
- #@spaces.GPU
29
- def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
30
- try:
31
- global llm
32
- llm = Llama(
33
- model_path=f"{dir}/models/{path}",
34
- n_ctx=n_ctx,
35
- n_gpu_layers=n_gpu_layers,
36
- n_threads=n_threads,
37
- verbose=verbose,
38
- f16_kv=f16_kv,
39
- logits_all=logits_all,
40
- vocab_only=vocab_only,
41
- use_mmap=use_mmap,
42
- use_mlock=use_mlock,
43
- n_batch=n_batch,
44
- last_n_tokens_size=last_n_tokens_size,
45
- low_vram=low_vram,
46
- rope_freq_base=rope_freq_base,
47
- rope_freq_scale=rope_freq_scale,
48
-
49
-
50
-
51
- )
52
- return path
53
- except:
54
- return ""
55
 
56
 
57
  def list_models(name):
@@ -69,6 +44,26 @@ hf_hub_download(
69
  cache_dir=".cache",
70
  )
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  history = []
74
 
@@ -79,7 +74,7 @@ If a question does not make any sense, or is not factually coherent, explain why
79
  """
80
  '''
81
 
82
- load_model("llama-2-7b-chat.ggmlv3.q2_K.bin", 2048, 0, 32, True, True, False, False, True, False, 512, 64, False, 10000, 1)
83
 
84
  #@spaces.GPU
85
  def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
@@ -116,7 +111,7 @@ def generate_text(message, history, system_prompt, preset, temperature, max_toke
116
 
117
 
118
 
119
- chatbot = gr.Chatbot(show_label=False, layout="panel", show_copy_button=True, height=500, min_width=180, autofocus=False)
120
 
121
  with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css") as demo:
122
  with gr.Row():
 
25
 
26
  dir = os.getcwd()
27
 
28
+
29
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  def list_models(name):
 
44
  cache_dir=".cache",
45
  )
46
 
47
+ time.sleep(1.5)
48
+
49
+ llm = Llama(
50
+ model_path=f"{dir}/models/llama-2-7b-chat.ggmlv3.q2_K.bin",
51
+ n_ctx=2048,
52
+ n_gpu_layers=0,
53
+ n_threads=32,
54
+ verbose=True,
55
+ f16_kv=True,
56
+ logits_all=False,
57
+ vocab_only=False,
58
+ use_mmap=True,
59
+ use_mlock=False,
60
+ n_batch=512,
61
+ last_n_tokens_size=64,
62
+ low_vram=False,
63
+ rope_freq_base=10000,
64
+ rope_freq_scale=1,
65
+ )
66
+
67
 
68
  history = []
69
 
 
74
  """
75
  '''
76
 
77
+
78
 
79
  #@spaces.GPU
80
  def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
 
111
 
112
 
113
 
114
+ chatbot = gr.Chatbot(show_label=False, autofocus=False, layout="panel", show_copy_button=True, height=500, min_width=180)
115
 
116
  with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css") as demo:
117
  with gr.Row():