lukestanley commited on
Commit
358cd20
·
1 Parent(s): ff938c3

Fix: Move n_ctx parameter to model setup!

Browse files
Files changed (1) hide show
  1. utils.py +1 -2
utils.py CHANGED
@@ -35,7 +35,7 @@ else:
35
 
36
  if in_memory_llm is None and USE_HTTP_SERVER is False:
37
  print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
38
- in_memory_llm = Llama(model_path=LLM_MODEL_PATH)
39
 
40
  def llm_streaming(
41
  prompt: str, pydantic_model_class, return_pydantic_object=False
@@ -117,7 +117,6 @@ def llm_stream_sans_network(
117
 
118
  stream = in_memory_llm(
119
  prompt,
120
- n_ctx=4096,
121
  max_tokens=1000,
122
  temperature=0.7,
123
  grammar=grammar,
 
35
 
36
  if in_memory_llm is None and USE_HTTP_SERVER is False:
37
  print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
38
+ in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096)
39
 
40
  def llm_streaming(
41
  prompt: str, pydantic_model_class, return_pydantic_object=False
 
117
 
118
  stream = in_memory_llm(
119
  prompt,
 
120
  max_tokens=1000,
121
  temperature=0.7,
122
  grammar=grammar,