Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,8 @@ from llama_cpp import Llama
|
|
6 |
client = InferenceClient()
|
7 |
|
8 |
llm = Llama.from_pretrained(
|
9 |
-
repo_id=
|
10 |
-
filename="
|
11 |
)
|
12 |
|
13 |
# Fixed system message
|
@@ -48,17 +48,18 @@ def respond(
|
|
48 |
demo = gr.ChatInterface(
|
49 |
respond,
|
50 |
additional_inputs=[
|
51 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="
|
52 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="
|
53 |
gr.Slider(
|
54 |
minimum=0.1,
|
55 |
maximum=1.0,
|
56 |
value=0.95,
|
57 |
step=0.05,
|
58 |
-
label="
|
59 |
),
|
60 |
],
|
61 |
)
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
demo.launch()
|
|
|
|
6 |
client = InferenceClient()
|
7 |
|
8 |
llm = Llama.from_pretrained(
|
9 |
+
repo_id="bartowski/Reasoning-Llama-1b-v0.1-GGUF",
|
10 |
+
filename="Reasoning-Llama-1b-v0.1-f16.gguf",
|
11 |
)
|
12 |
|
13 |
# Fixed system message
|
|
|
48 |
demo = gr.ChatInterface(
|
49 |
respond,
|
50 |
additional_inputs=[
|
51 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Maximum Response Length"),
|
52 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Creativity"),
|
53 |
gr.Slider(
|
54 |
minimum=0.1,
|
55 |
maximum=1.0,
|
56 |
value=0.95,
|
57 |
step=0.05,
|
58 |
+
label="Neuron Firing Rate",
|
59 |
),
|
60 |
],
|
61 |
)
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
demo.launch()
|
65 |
+
|