sugiv commited on
Commit
90f1673
·
1 Parent(s): 84f728b

Change of settings again for TPU

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -44,7 +44,7 @@ REPO_ID = "sugiv/leetmonkey-peft-gguf"
44
 
45
  # Load the model
46
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
47
- llm = Llama(model_path=model_path, n_ctx=1500, n_threads=16, n_gpu_layers=-1, verbose=False, mlock=True) ## TPU
48
  #llm = Llama(model_path=model_path, n_ctx=1024, n_threads=16, n_gpu_layers=0, verbose=False, mlock=False) ## CPU only
49
  #llm = Llama(model_path=model_path, n_ctx=1024, n_threads=16, n_gpu_layers=10, verbose=False, mlock=False) ## Nvidia
50
  logger.info("8-bit model loaded successfully")
@@ -54,7 +54,7 @@ token_to_problem_solution = {}
54
 
55
  # Generation parameters
56
  generation_kwargs = {
57
- "max_tokens": 1024,
58
  "stop": ["```", "### Instruction:", "### Response:"],
59
  "echo": False,
60
  "temperature": 0.05,
 
44
 
45
  # Load the model
46
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
47
+ llm = Llama(model_path=model_path, n_ctx=1024, n_threads=8, n_gpu_layers=-1, verbose=False, mlock=True) ## TPU
48
  #llm = Llama(model_path=model_path, n_ctx=1024, n_threads=16, n_gpu_layers=0, verbose=False, mlock=False) ## CPU only
49
  #llm = Llama(model_path=model_path, n_ctx=1024, n_threads=16, n_gpu_layers=10, verbose=False, mlock=False) ## Nvidia
50
  logger.info("8-bit model loaded successfully")
 
54
 
55
  # Generation parameters
56
  generation_kwargs = {
57
+ "max_tokens": 512,
58
  "stop": ["```", "### Instruction:", "### Response:"],
59
  "echo": False,
60
  "temperature": 0.05,