Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,13 @@ llm = Llama.from_pretrained(
|
|
9 |
repo_id="prithivMLmods/SmolLM2-135M-Instruct-GGUF",
|
10 |
filename="SmolLM2-135M-Instruct.Q5_K_M.gguf",
|
11 |
verbose=False,
|
|
|
12 |
)
|
13 |
|
14 |
def generate(
|
15 |
user_prompt: str,
|
16 |
system_prompt: str = "You are a helpful assistant.",
|
17 |
-
max_tokens: int =
|
18 |
temperature: float = 0.2,
|
19 |
top_p: float = 0.95,
|
20 |
top_k: int = 40,
|
@@ -44,7 +45,7 @@ with gr.Blocks() as demo:
|
|
44 |
|
45 |
with gr.Accordion("kwargs"):
|
46 |
with gr.Row(variant="panel"):
|
47 |
-
max_tokens = gr.Number(label="Max tokens", value=
|
48 |
temperature = gr.Number(label="Temperature", value=0.2)
|
49 |
top_p = gr.Number(label="Top p", value=0.95)
|
50 |
top_k = gr.Number(label="Top k", value=40)
|
|
|
9 |
repo_id="prithivMLmods/SmolLM2-135M-Instruct-GGUF",
|
10 |
filename="SmolLM2-135M-Instruct.Q5_K_M.gguf",
|
11 |
verbose=False,
|
12 |
+
n_ctx=7000
|
13 |
)
|
14 |
|
15 |
def generate(
|
16 |
user_prompt: str,
|
17 |
system_prompt: str = "You are a helpful assistant.",
|
18 |
+
max_tokens: int = 8192-7000,
|
19 |
temperature: float = 0.2,
|
20 |
top_p: float = 0.95,
|
21 |
top_k: int = 40,
|
|
|
45 |
|
46 |
with gr.Accordion("kwargs"):
|
47 |
with gr.Row(variant="panel"):
|
48 |
+
max_tokens = gr.Number(label="Max tokens", value=1100)
|
49 |
temperature = gr.Number(label="Temperature", value=0.2)
|
50 |
top_p = gr.Number(label="Top p", value=0.95)
|
51 |
top_k = gr.Number(label="Top k", value=40)
|