Akjava commited on
Commit
3debf7a
·
1 Parent(s): 8ce032d
Files changed (2) hide show
  1. README.md +2 -3
  2. app.py +13 -7
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
- title: Llamacpp-madlad400-3b-mt
3
- emoji: ⚡
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: gradio
@@ -8,7 +7,7 @@ sdk_version: 5.20.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: madlad400-3b-mt on Llama.cpp CPU
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Llamacpp-t5-query-reformulation-RL
 
3
  colorFrom: indigo
4
  colorTo: blue
5
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
  license: mit
10
+ short_description: t5-query-reformulation-RL on Llama.cpp CPU
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -25,16 +25,20 @@ from exception import CustomExceptionHandling
25
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
26
  os.makedirs("models",exist_ok=True)
27
 
 
 
28
  hf_hub_download(
29
  repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
30
  filename="t5-query-reformulation-RL-q8_0.gguf",
31
  local_dir="./models",
32
  )
33
 
 
 
34
  # Set the title and description
35
  title = "t5-query-reformulation-RL Llama.cpp"
36
  description = """
37
- I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
38
 
39
  [Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
40
  """
@@ -77,16 +81,18 @@ def respond(
77
  Returns:
78
  str: The response to the message.
79
  """
 
80
  try:
81
  global llama
82
  if llama == None:
83
- llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
 
84
  n_gpu_layers=0,
85
- n_batch=32,
86
  n_ctx=512,
87
  n_threads=2,
88
  n_threads_batch=2)
89
-
90
  tokens = llama.tokenize(f"{message}".encode("utf-8"))
91
  llama.encode(tokens)
92
  tokens = [llama.decoder_start_token()]
@@ -114,7 +120,7 @@ def respond(
114
  # Create a chat interface
115
  demo = gr.ChatInterface(
116
  respond,
117
- examples=[["What is the capital of France?"], ["Tell me something about artificial intelligence."], ["What is gravity?"]],
118
  additional_inputs_accordion=gr.Accordion(
119
  label="⚙️ Parameters", open=False, render=False
120
  ),
@@ -140,12 +146,12 @@ demo = gr.ChatInterface(
140
  value=1024,
141
  step=1,
142
  label="Max Tokens",
143
- info="Maximum length of response (higher = longer replies)",
144
  ),
145
  gr.Slider(
146
  minimum=0.1,
147
  maximum=2.0,
148
- value=0.7,
149
  step=0.1,
150
  label="Temperature",
151
  info="Creativity level (higher = more creative, lower = more focused)",
 
25
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
26
  os.makedirs("models",exist_ok=True)
27
 
28
+
29
+
30
  hf_hub_download(
31
  repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
32
  filename="t5-query-reformulation-RL-q8_0.gguf",
33
  local_dir="./models",
34
  )
35
 
36
+
37
+
38
  # Set the title and description
39
  title = "t5-query-reformulation-RL Llama.cpp"
40
  description = """
41
+ I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
42
 
43
  [Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
44
  """
 
81
  Returns:
82
  str: The response to the message.
83
  """
84
+
85
  try:
86
  global llama
87
  if llama == None:
88
+ model_id = "t5-query-reformulation-RL-q8_0.gguf"
89
+ llama = Llama(f"models/{model_id}",flash_attn=False,
90
  n_gpu_layers=0,
91
+ #n_batch=16,#batch sometime make error
92
  n_ctx=512,
93
  n_threads=2,
94
  n_threads_batch=2)
95
+
96
  tokens = llama.tokenize(f"{message}".encode("utf-8"))
97
  llama.encode(tokens)
98
  tokens = [llama.decoder_start_token()]
 
120
  # Create a chat interface
121
  demo = gr.ChatInterface(
122
  respond,
123
+ examples=[["What is the capital of France?"], ["What real child was raised by wolves?"], ["What is gravity?"]],
124
  additional_inputs_accordion=gr.Accordion(
125
  label="⚙️ Parameters", open=False, render=False
126
  ),
 
146
  value=1024,
147
  step=1,
148
  label="Max Tokens",
149
+ info="Maximum length of response (higher = longer replies)",visible=False
150
  ),
151
  gr.Slider(
152
  minimum=0.1,
153
  maximum=2.0,
154
+ value=0.4,
155
  step=0.1,
156
  label="Temperature",
157
  info="Creativity level (higher = more creative, lower = more focused)",