update app.py
Browse files
app.py
CHANGED
@@ -23,8 +23,9 @@ def load_model(model_name):
|
|
23 |
if model_name=='llama':
|
24 |
from langchain.llms import CTransformers
|
25 |
|
26 |
-
model = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
|
27 |
-
|
|
|
28 |
tokenizer = None
|
29 |
|
30 |
elif model_name=='mistral':
|
@@ -85,10 +86,11 @@ def wrap_model(model, tokenizer):
|
|
85 |
model=model,
|
86 |
tokenizer=tokenizer,
|
87 |
task="text-generation",
|
88 |
-
temperature=0.
|
89 |
-
repetition_penalty=
|
90 |
-
|
91 |
-
max_new_tokens=
|
|
|
92 |
pad_token_id=2,
|
93 |
do_sample=True)
|
94 |
HF_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)
|
|
|
23 |
if model_name=='llama':
|
24 |
from langchain.llms import CTransformers
|
25 |
|
26 |
+
model = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
|
27 |
+
model_file = 'llama-2-7b-chat.ggmlv3.q4_0.bin',
|
28 |
+
model_type='llama', gpu_layers=0) # config={"context_length":2048,})
|
29 |
tokenizer = None
|
30 |
|
31 |
elif model_name=='mistral':
|
|
|
86 |
model=model,
|
87 |
tokenizer=tokenizer,
|
88 |
task="text-generation",
|
89 |
+
temperature=0.5,
|
90 |
+
repetition_penalty=2.1,
|
91 |
+
no_repeat_ngram_size=3
|
92 |
+
max_new_tokens=400,
|
93 |
+
num_beams=2,
|
94 |
pad_token_id=2,
|
95 |
do_sample=True)
|
96 |
HF_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)
|