Manel commited on
Commit
8c0109f
·
verified ·
1 Parent(s): 38348b6

update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -23,8 +23,9 @@ def load_model(model_name):
23
  if model_name=='llama':
24
  from langchain.llms import CTransformers
25
 
26
- model = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin',
27
- model_type='llama', gpu_layers=0, config={"context_length":2048,})
 
28
  tokenizer = None
29
 
30
  elif model_name=='mistral':
@@ -85,10 +86,11 @@ def wrap_model(model, tokenizer):
85
  model=model,
86
  tokenizer=tokenizer,
87
  task="text-generation",
88
- temperature=0.2,
89
- repetition_penalty=1.1,
90
- #return_full_text=True,
91
- max_new_tokens=1000,
 
92
  pad_token_id=2,
93
  do_sample=True)
94
  HF_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)
 
23
  if model_name=='llama':
24
  from langchain.llms import CTransformers
25
 
26
+ model = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
27
+ model_file = 'llama-2-7b-chat.ggmlv3.q4_0.bin',
28
+ model_type='llama', gpu_layers=0) # config={"context_length":2048,})
29
  tokenizer = None
30
 
31
  elif model_name=='mistral':
 
86
  model=model,
87
  tokenizer=tokenizer,
88
  task="text-generation",
89
+ temperature=0.5,
90
+ repetition_penalty=2.1,
91
+ no_repeat_ngram_size=3
92
+ max_new_tokens=400,
93
+ num_beams=2,
94
  pad_token_id=2,
95
  do_sample=True)
96
  HF_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)