Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -33,7 +33,6 @@ import requests
|
|
33 |
|
34 |
from huggingface_hub import snapshot_download
|
35 |
|
36 |
-
default_temperature = 0.15
|
37 |
default_max_context = 16384
|
38 |
default_max_output = 512
|
39 |
|
@@ -53,11 +52,9 @@ for model in available_models:
|
|
53 |
dirs.update({model: snapshot_download(repo_id="turboderp/pixtral-12b-exl2", revision=model)})
|
54 |
|
55 |
@spaces.GPU(duration=45)
|
56 |
-
def run_inference(message, history, model_picked,
|
57 |
if not model_picked:
|
58 |
model_picked = default_bpw
|
59 |
-
if not temperature:
|
60 |
-
temperature = default_temperature
|
61 |
if not context_size:
|
62 |
context_size = default_max_context
|
63 |
if not max_output:
|
@@ -128,7 +125,6 @@ def run_inference(message, history, model_picked, temperature, context_size, max
|
|
128 |
output = generator.generate(
|
129 |
prompt = prompt,
|
130 |
max_new_tokens = max_output,
|
131 |
-
temperature = temperature,
|
132 |
add_bos = True,
|
133 |
encode_special_tokens = True,
|
134 |
decode_special_tokens = True,
|
@@ -153,7 +149,6 @@ The current default settings are:
|
|
153 |
- Model Quant: 4.0bpw
|
154 |
- Context Size: 16k tokens
|
155 |
- Max Output: 512 tokens
|
156 |
-
- Temperature: 0.15
|
157 |
|
158 |
You can select other quants and experiment!
|
159 |
|
@@ -166,8 +161,7 @@ examples = [
|
|
166 |
]
|
167 |
|
168 |
drop = gr.Dropdown(available_models, label="EXL2 Quant", value=default_bpw)
|
169 |
-
temperature_gradio = gr.Slider(minimum = 0, maximum = 1, label="Temperature", value=default_temperature, step = 0.05)
|
170 |
context_size_gradio = gr.Slider(minimum = 256, maximum = 32768, label="Context Size", value=default_max_context, step = 1)
|
171 |
output_length_gradio = gr.Slider(minimum = 1, maximum = 4096, label="Max Ouput Length", value=default_max_output, step = 1)
|
172 |
-
demo = gr.ChatInterface(fn=run_inference, examples = examples, title="Pixtral 12B EXL2", multimodal=True, description=description, additional_inputs = [drop,
|
173 |
demo.queue().launch()
|
|
|
33 |
|
34 |
from huggingface_hub import snapshot_download
|
35 |
|
|
|
36 |
default_max_context = 16384
|
37 |
default_max_output = 512
|
38 |
|
|
|
52 |
dirs.update({model: snapshot_download(repo_id="turboderp/pixtral-12b-exl2", revision=model)})
|
53 |
|
54 |
@spaces.GPU(duration=45)
|
55 |
+
def run_inference(message, history, model_picked, context_size, max_output):
|
56 |
if not model_picked:
|
57 |
model_picked = default_bpw
|
|
|
|
|
58 |
if not context_size:
|
59 |
context_size = default_max_context
|
60 |
if not max_output:
|
|
|
125 |
output = generator.generate(
|
126 |
prompt = prompt,
|
127 |
max_new_tokens = max_output,
|
|
|
128 |
add_bos = True,
|
129 |
encode_special_tokens = True,
|
130 |
decode_special_tokens = True,
|
|
|
149 |
- Model Quant: 4.0bpw
|
150 |
- Context Size: 16k tokens
|
151 |
- Max Output: 512 tokens
|
|
|
152 |
|
153 |
You can select other quants and experiment!
|
154 |
|
|
|
161 |
]
|
162 |
|
163 |
drop = gr.Dropdown(available_models, label="EXL2 Quant", value=default_bpw)
|
|
|
164 |
context_size_gradio = gr.Slider(minimum = 256, maximum = 32768, label="Context Size", value=default_max_context, step = 1)
|
165 |
output_length_gradio = gr.Slider(minimum = 1, maximum = 4096, label="Max Ouput Length", value=default_max_output, step = 1)
|
166 |
+
demo = gr.ChatInterface(fn=run_inference, examples = examples, title="Pixtral 12B EXL2", multimodal=True, description=description, additional_inputs = [drop, context_size_gradio, output_length_gradio])
|
167 |
demo.queue().launch()
|