Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -113,7 +113,7 @@ def run_inference(message, history, model_picked):
|
|
| 113 |
print(prompt)
|
| 114 |
|
| 115 |
# Gnerating Response
|
| 116 |
-
|
| 117 |
prompt = prompt,
|
| 118 |
max_new_tokens = 1024,
|
| 119 |
temperature = 0.15,
|
|
@@ -122,15 +122,11 @@ def run_inference(message, history, model_picked):
|
|
| 122 |
decode_special_tokens = True,
|
| 123 |
stop_conditions = [tokenizer.eos_token_id],
|
| 124 |
gen_settings = ExLlamaV2Sampler.Settings.greedy(),
|
| 125 |
-
embeddings = images_embeddings
|
| 126 |
-
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
else:
|
| 131 |
-
result = out
|
| 132 |
-
print(result)
|
| 133 |
-
yield result
|
| 134 |
|
| 135 |
description="""
|
| 136 |
A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
|
|
|
|
| 113 |
print(prompt)
|
| 114 |
|
| 115 |
# Gnerating Response
|
| 116 |
+
output = generator.generate(
|
| 117 |
prompt = prompt,
|
| 118 |
max_new_tokens = 1024,
|
| 119 |
temperature = 0.15,
|
|
|
|
| 122 |
decode_special_tokens = True,
|
| 123 |
stop_conditions = [tokenizer.eos_token_id],
|
| 124 |
gen_settings = ExLlamaV2Sampler.Settings.greedy(),
|
| 125 |
+
embeddings = images_embeddings
|
| 126 |
+
)
|
| 127 |
+
result = out.split("[/INST]")[-1]
|
| 128 |
+
print(result)
|
| 129 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
description="""
|
| 132 |
A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
|