Spaces:
Sleeping
Sleeping
Commit
·
358cd20
1
Parent(s):
ff938c3
Fix: Move n_ctx parameter to model setup!
Browse files
utils.py
CHANGED
@@ -35,7 +35,7 @@ else:
|
|
35 |
|
36 |
if in_memory_llm is None and USE_HTTP_SERVER is False:
|
37 |
print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
|
38 |
-
in_memory_llm = Llama(model_path=LLM_MODEL_PATH)
|
39 |
|
40 |
def llm_streaming(
|
41 |
prompt: str, pydantic_model_class, return_pydantic_object=False
|
@@ -117,7 +117,6 @@ def llm_stream_sans_network(
|
|
117 |
|
118 |
stream = in_memory_llm(
|
119 |
prompt,
|
120 |
-
n_ctx=4096,
|
121 |
max_tokens=1000,
|
122 |
temperature=0.7,
|
123 |
grammar=grammar,
|
|
|
35 |
|
36 |
if in_memory_llm is None and USE_HTTP_SERVER is False:
|
37 |
print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
|
38 |
+
in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096)
|
39 |
|
40 |
def llm_streaming(
|
41 |
prompt: str, pydantic_model_class, return_pydantic_object=False
|
|
|
117 |
|
118 |
stream = in_memory_llm(
|
119 |
prompt,
|
|
|
120 |
max_tokens=1000,
|
121 |
temperature=0.7,
|
122 |
grammar=grammar,
|