Daemontatox commited on
Commit
6c8aa67
·
verified ·
1 Parent(s): 632dfa0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -34
app.py CHANGED
@@ -151,16 +151,16 @@ retriever = db.as_retriever(
151
  # )
152
 
153
 
154
- #llm = ChatOpenAI(
155
- # base_url="https://openrouter.ai/api/v1",
156
- #temperature=0.01,
157
- # api_key=OPENAPI_KEY,
158
- #model="google/gemini-2.0-flash-exp:free",
159
- #max_tokens=None,
160
- #timeout=None,
161
- # max_retries=3,
162
 
163
- #)
164
 
165
 
166
  # llm = ChatCerebras(
@@ -171,31 +171,6 @@ retriever = db.as_retriever(
171
 
172
 
173
 
174
- quantization_config = BitsAndBytesConfig(
175
- load_in_4bit=True,
176
- bnb_4bit_compute_dtype=torch.bfloat16,
177
- bnb_4bit_quant_type="nf4",
178
- bnb_4bit_use_double_quant=True
179
- )
180
-
181
-
182
-
183
-
184
- model_id = "meta-llama/Llama-3.2-3B-Instruct"
185
- tokenizer = AutoTokenizer.from_pretrained(model_id)
186
-
187
- model = AutoModelForCausalLM.from_pretrained(
188
- model_id,
189
- torch_dtype=torch.float16,
190
- device_map="cuda",
191
- attn_implementation="flash_attention_2",
192
- #quantization_config=quantization_config
193
- )
194
-
195
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
196
-
197
- llm = HuggingFacePipeline(pipeline=pipe)
198
-
199
 
200
 
201
 
 
151
  # )
152
 
153
 
154
+ llm = ChatOpenAI(
155
+ base_url="https://openrouter.ai/api/v1",
156
+ temperature=0.01,
157
+ api_key=OPENAPI_KEY,
158
+ model="google/gemini-2.0-flash-exp:free",
159
+ max_tokens=None,
160
+ timeout=None,
161
+ max_retries=3,
162
 
163
+ )
164
 
165
 
166
  # llm = ChatCerebras(
 
171
 
172
 
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
 
176