Update app.py
Browse files
app.py
CHANGED
@@ -121,41 +121,41 @@ retriever = db.as_retriever(
|
|
121 |
|
122 |
|
123 |
# Set up the LLM
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
-
|
133 |
|
134 |
-
quantization_config = BitsAndBytesConfig(
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
|
141 |
|
142 |
|
143 |
|
144 |
-
model_id = "unsloth/phi-4"
|
145 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
146 |
|
147 |
-
model = AutoModelForCausalLM.from_pretrained(
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
|
154 |
-
|
155 |
|
156 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
|
157 |
|
158 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
159 |
|
160 |
|
161 |
|
@@ -201,7 +201,7 @@ def create_rag_chain(chat_history: str):
|
|
201 |
chat_history = ChatHistory()
|
202 |
|
203 |
# Gradio Function
|
204 |
-
@spaces.GPU()
|
205 |
def ask_question_gradio(question, history):
|
206 |
try:
|
207 |
# Add user question to chat history
|
|
|
121 |
|
122 |
|
123 |
# Set up the LLM
|
124 |
+
llm = ChatOpenAI(
|
125 |
+
base_url="https://api-inference.huggingface.co/v1/",
|
126 |
+
temperature=0,
|
127 |
+
api_key=HF_TOKEN,
|
128 |
+
model="mistralai/Mistral-Nemo-Instruct-2407",
|
129 |
+
max_tokens=None,
|
130 |
+
timeout=None
|
131 |
|
132 |
+
)
|
133 |
|
134 |
+
# quantization_config = BitsAndBytesConfig(
|
135 |
+
# load_in_4bit=True,
|
136 |
+
# bnb_4bit_compute_dtype=torch.bfloat16,
|
137 |
+
# bnb_4bit_quant_type="nf4",
|
138 |
+
# bnb_4bit_use_double_quant=True
|
139 |
+
# )
|
140 |
|
141 |
|
142 |
|
143 |
|
144 |
+
# model_id = "unsloth/phi-4"
|
145 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_id)
|
146 |
|
147 |
+
# model = AutoModelForCausalLM.from_pretrained(
|
148 |
+
# model_id,
|
149 |
+
# torch_dtype=torch.float16,
|
150 |
+
# device_map="cuda",
|
151 |
+
# attn_implementation="flash_attention_2",
|
152 |
+
# quantization_config=quantization_config
|
153 |
|
154 |
+
# )
|
155 |
|
156 |
+
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=8192 )
|
157 |
|
158 |
+
# llm = HuggingFacePipeline(pipeline=pipe)
|
159 |
|
160 |
|
161 |
|
|
|
201 |
chat_history = ChatHistory()
|
202 |
|
203 |
# Gradio Function
|
204 |
+
# @spaces.GPU()
|
205 |
def ask_question_gradio(question, history):
|
206 |
try:
|
207 |
# Add user question to chat history
|