Update app.py
Browse files
app.py
CHANGED
@@ -171,31 +171,30 @@ retriever = db.as_retriever(
|
|
171 |
|
172 |
|
173 |
|
174 |
-
|
175 |
load_in_8bit=True,
|
176 |
bnb_8bit_compute_dtype=torch.bfloat16,
|
177 |
bnb_8bit_quant_type="nf4",
|
178 |
bnb_8bit_use_double_quant=True
|
179 |
-
|
180 |
|
181 |
|
182 |
|
183 |
|
184 |
-
|
185 |
-
|
186 |
|
187 |
-
|
188 |
model_id,
|
189 |
torch_dtype=torch.float16,
|
190 |
device_map="cuda",
|
191 |
attn_implementation="flash_attention_2",
|
192 |
quantization_config=quantization_config
|
|
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
|
197 |
|
198 |
-
|
199 |
|
200 |
|
201 |
|
|
|
171 |
|
172 |
|
173 |
|
174 |
+
quantization_config = BitsAndBytesConfig(
|
175 |
load_in_8bit=True,
|
176 |
bnb_8bit_compute_dtype=torch.bfloat16,
|
177 |
bnb_8bit_quant_type="nf4",
|
178 |
bnb_8bit_use_double_quant=True
|
179 |
+
)
|
180 |
|
181 |
|
182 |
|
183 |
|
184 |
+
model_id = "mistralai/Mistral-Nemo-Instruct-2407"
|
185 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
186 |
|
187 |
+
model = AutoModelForCausalLM.from_pretrained(
|
188 |
model_id,
|
189 |
torch_dtype=torch.float16,
|
190 |
device_map="cuda",
|
191 |
attn_implementation="flash_attention_2",
|
192 |
quantization_config=quantization_config
|
193 |
+
)
|
194 |
|
195 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
|
|
|
|
|
196 |
|
197 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
198 |
|
199 |
|
200 |
|