sugiv commited on
Commit
5fce82e
·
1 Parent(s): dd29eef

Change of settings again for High CPU

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -46,8 +46,8 @@ REPO_ID = "sugiv/leetmonkey-peft-gguf"
46
  # Load the model
47
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
48
  #llm = Llama(model_path=model_path, n_ctx=2048, n_threads=16, n_gpu_layers=-1, verbose=False, mlock=True) ## TPU
49
- #llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2, n_gpu_layers=0, verbose=False, mlock=False) ## CPU only
50
- llm = Llama(model_path=model_path, n_ctx=1024, n_threads=4, n_gpu_layers=-1, verbose=False, mlock=True) ## Nvidia
51
  logger.info("8-bit model loaded successfully")
52
 
53
  # User data storage
 
46
  # Load the model
47
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
48
  #llm = Llama(model_path=model_path, n_ctx=2048, n_threads=16, n_gpu_layers=-1, verbose=False, mlock=True) ## TPU
49
+ llm = Llama(model_path=model_path, n_ctx=1024, n_threads=8, n_gpu_layers=0, verbose=False, mlock=True) ## CPU only
50
+ #llm = Llama(model_path=model_path, n_ctx=1024, n_threads=4, n_gpu_layers=-1, verbose=False, mlock=True) ## Nvidia
51
  logger.info("8-bit model loaded successfully")
52
 
53
  # User data storage