Daemontatox commited on
Commit
64f4771
·
verified ·
1 Parent(s): 2c311d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -171,31 +171,30 @@ retriever = db.as_retriever(
171
 
172
 
173
 
174
- quantization_config = BitsAndBytesConfig(
175
  load_in_8bit=True,
176
  bnb_8bit_compute_dtype=torch.bfloat16,
177
  bnb_8bit_quant_type="nf4",
178
  bnb_8bit_use_double_quant=True
179
- )
180
 
181
 
182
 
183
 
184
- model_id = "mistralai/Mistral-Nemo-Instruct-2407"
185
- tokenizer = AutoTokenizer.from_pretrained(model_id)
186
 
187
- model = AutoModelForCausalLM.from_pretrained(
188
  model_id,
189
  torch_dtype=torch.float16,
190
  device_map="cuda",
191
  attn_implementation="flash_attention_2",
192
  quantization_config=quantization_config
 
193
 
194
- )
195
-
196
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
197
 
198
- llm = HuggingFacePipeline(pipeline=pipe)
199
 
200
 
201
 
 
171
 
172
 
173
 
174
+ quantization_config = BitsAndBytesConfig(
175
  load_in_8bit=True,
176
  bnb_8bit_compute_dtype=torch.bfloat16,
177
  bnb_8bit_quant_type="nf4",
178
  bnb_8bit_use_double_quant=True
179
+ )
180
 
181
 
182
 
183
 
184
+ model_id = "mistralai/Mistral-Nemo-Instruct-2407"
185
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
186
 
187
+ model = AutoModelForCausalLM.from_pretrained(
188
  model_id,
189
  torch_dtype=torch.float16,
190
  device_map="cuda",
191
  attn_implementation="flash_attention_2",
192
  quantization_config=quantization_config
193
+ )
194
 
195
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
 
 
196
 
197
+ llm = HuggingFacePipeline(pipeline=pipe)
198
 
199
 
200