pandora-s commited on
Commit
5749a76
·
verified ·
1 Parent(s): 52c2787

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -8
app.py CHANGED
@@ -33,7 +33,6 @@ import requests
33
 
34
  from huggingface_hub import snapshot_download
35
 
36
- default_temperature = 0.15
37
  default_max_context = 16384
38
  default_max_output = 512
39
 
@@ -53,11 +52,9 @@ for model in available_models:
53
  dirs.update({model: snapshot_download(repo_id="turboderp/pixtral-12b-exl2", revision=model)})
54
 
55
  @spaces.GPU(duration=45)
56
- def run_inference(message, history, model_picked, temperature, context_size, max_output):
57
  if not model_picked:
58
  model_picked = default_bpw
59
- if not temperature:
60
- temperature = default_temperature
61
  if not context_size:
62
  context_size = default_max_context
63
  if not max_output:
@@ -128,7 +125,6 @@ def run_inference(message, history, model_picked, temperature, context_size, max
128
  output = generator.generate(
129
  prompt = prompt,
130
  max_new_tokens = max_output,
131
- temperature = temperature,
132
  add_bos = True,
133
  encode_special_tokens = True,
134
  decode_special_tokens = True,
@@ -153,7 +149,6 @@ The current default settings are:
153
  - Model Quant: 4.0bpw
154
  - Context Size: 16k tokens
155
  - Max Output: 512 tokens
156
- - Temperature: 0.15
157
 
158
  You can select other quants and experiment!
159
 
@@ -166,8 +161,7 @@ examples = [
166
  ]
167
 
168
  drop = gr.Dropdown(available_models, label="EXL2 Quant", value=default_bpw)
169
- temperature_gradio = gr.Slider(minimum = 0, maximum = 1, label="Temperature", value=default_temperature, step = 0.05)
170
  context_size_gradio = gr.Slider(minimum = 256, maximum = 32768, label="Context Size", value=default_max_context, step = 1)
171
  output_length_gradio = gr.Slider(minimum = 1, maximum = 4096, label="Max Ouput Length", value=default_max_output, step = 1)
172
- demo = gr.ChatInterface(fn=run_inference, examples = examples, title="Pixtral 12B EXL2", multimodal=True, description=description, additional_inputs = [drop, temperature_gradio, context_size_gradio, output_length_gradio])
173
  demo.queue().launch()
 
33
 
34
  from huggingface_hub import snapshot_download
35
 
 
36
  default_max_context = 16384
37
  default_max_output = 512
38
 
 
52
  dirs.update({model: snapshot_download(repo_id="turboderp/pixtral-12b-exl2", revision=model)})
53
 
54
  @spaces.GPU(duration=45)
55
+ def run_inference(message, history, model_picked, context_size, max_output):
56
  if not model_picked:
57
  model_picked = default_bpw
 
 
58
  if not context_size:
59
  context_size = default_max_context
60
  if not max_output:
 
125
  output = generator.generate(
126
  prompt = prompt,
127
  max_new_tokens = max_output,
 
128
  add_bos = True,
129
  encode_special_tokens = True,
130
  decode_special_tokens = True,
 
149
  - Model Quant: 4.0bpw
150
  - Context Size: 16k tokens
151
  - Max Output: 512 tokens
 
152
 
153
  You can select other quants and experiment!
154
 
 
161
  ]
162
 
163
  drop = gr.Dropdown(available_models, label="EXL2 Quant", value=default_bpw)
 
164
  context_size_gradio = gr.Slider(minimum = 256, maximum = 32768, label="Context Size", value=default_max_context, step = 1)
165
  output_length_gradio = gr.Slider(minimum = 1, maximum = 4096, label="Max Ouput Length", value=default_max_output, step = 1)
166
+ demo = gr.ChatInterface(fn=run_inference, examples = examples, title="Pixtral 12B EXL2", multimodal=True, description=description, additional_inputs = [drop, context_size_gradio, output_length_gradio])
167
  demo.queue().launch()