sugiv commited on
Commit
5818248
·
1 Parent(s): 3de40eb

Enabling share and CORS support

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -15,6 +15,10 @@ from datasets import load_dataset
15
  from fastapi.responses import StreamingResponse
16
  import random
17
 
 
 
 
 
18
  # Load the dataset (you might want to do this once at the start of your script)
19
  dataset = load_dataset("sugiv/leetmonkey_python_dataset")
20
  train_dataset = dataset["train"]
@@ -35,7 +39,7 @@ REPO_ID = "sugiv/leetmonkey-peft-gguf"
35
 
36
  # Load the model
37
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
38
- llm = Llama(model_path=model_path, n_ctx=1024, n_threads=8, n_gpu_layers=-1)
39
  logger.info("8-bit model loaded successfully")
40
 
41
  # Generation parameters
@@ -46,8 +50,7 @@ generation_kwargs = {
46
  "temperature": 0.05,
47
  "top_k": 10,
48
  "top_p": 0.9,
49
- "repeat_penalty": 1.1,
50
- "verbose": False
51
  }
52
 
53
  def verify_token(token: str) -> bool:
 
15
  from fastapi.responses import StreamingResponse
16
  import random
17
 
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
  # Load the dataset (you might want to do this once at the start of your script)
23
  dataset = load_dataset("sugiv/leetmonkey_python_dataset")
24
  train_dataset = dataset["train"]
 
39
 
40
  # Load the model
41
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
42
+ llm = Llama(model_path=model_path, n_ctx=1024, n_threads=8, n_gpu_layers=-1, verbose=False, mlock=True)
43
  logger.info("8-bit model loaded successfully")
44
 
45
  # Generation parameters
 
50
  "temperature": 0.05,
51
  "top_k": 10,
52
  "top_p": 0.9,
53
+ "repeat_penalty": 1.1
 
54
  }
55
 
56
  def verify_token(token: str) -> bool: