Daemontatox commited on
Commit
d8573fe
·
verified ·
1 Parent(s): ec56115

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -26
app.py CHANGED
@@ -121,41 +121,41 @@ retriever = db.as_retriever(
121
 
122
 
123
  # Set up the LLM
124
- # llm = ChatOpenAI(
125
- # base_url="https://api-inference.huggingface.co/v1/",
126
- # temperature=0,
127
- # api_key=HF_TOKEN,
128
- # model="meta-llama/Llama-3.3-70B-Instruct",
129
- # max_tokens=None,
130
- # timeout=None
131
 
132
- # )
133
 
134
- quantization_config = BitsAndBytesConfig(
135
- load_in_4bit=True,
136
- bnb_4bit_compute_dtype=torch.bfloat16,
137
- bnb_4bit_quant_type="nf4",
138
- bnb_4bit_use_double_quant=True
139
- )
140
 
141
 
142
 
143
 
144
- model_id = "unsloth/phi-4"
145
- tokenizer = AutoTokenizer.from_pretrained(model_id)
146
 
147
- model = AutoModelForCausalLM.from_pretrained(
148
- model_id,
149
- torch_dtype=torch.float16,
150
- device_map="cuda",
151
- attn_implementation="flash_attention_2",
152
- quantization_config=quantization_config
153
 
154
- )
155
 
156
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
157
 
158
- llm = HuggingFacePipeline(pipeline=pipe)
159
 
160
 
161
 
@@ -201,7 +201,7 @@ def create_rag_chain(chat_history: str):
201
  chat_history = ChatHistory()
202
 
203
  # Gradio Function
204
- @spaces.GPU()
205
  def ask_question_gradio(question, history):
206
  try:
207
  # Add user question to chat history
 
121
 
122
 
123
  # Set up the LLM
124
+ llm = ChatOpenAI(
125
+ base_url="https://api-inference.huggingface.co/v1/",
126
+ temperature=0,
127
+ api_key=HF_TOKEN,
128
+ model="mistralai/Mistral-Nemo-Instruct-2407",
129
+ max_tokens=None,
130
+ timeout=None
131
 
132
+ )
133
 
134
+ # quantization_config = BitsAndBytesConfig(
135
+ # load_in_4bit=True,
136
+ # bnb_4bit_compute_dtype=torch.bfloat16,
137
+ # bnb_4bit_quant_type="nf4",
138
+ # bnb_4bit_use_double_quant=True
139
+ # )
140
 
141
 
142
 
143
 
144
+ # model_id = "unsloth/phi-4"
145
+ # tokenizer = AutoTokenizer.from_pretrained(model_id)
146
 
147
+ # model = AutoModelForCausalLM.from_pretrained(
148
+ # model_id,
149
+ # torch_dtype=torch.float16,
150
+ # device_map="cuda",
151
+ # attn_implementation="flash_attention_2",
152
+ # quantization_config=quantization_config
153
 
154
+ # )
155
 
156
+ # pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
157
 
158
+ # llm = HuggingFacePipeline(pipeline=pipe)
159
 
160
 
161
 
 
201
  chat_history = ChatHistory()
202
 
203
  # Gradio Function
204
+ # @spaces.GPU()
205
  def ask_question_gradio(question, history):
206
  try:
207
  # Add user question to chat history