Update app.py
Browse files
app.py
CHANGED
@@ -151,7 +151,6 @@ def finetune_small_subset():
|
|
151 |
|
152 |
return "Finetuning complete. Model loaded for inference."
|
153 |
|
154 |
-
|
155 |
def ensure_pipeline():
|
156 |
"""
|
157 |
If we haven't finetuned yet (TEXT_PIPELINE is None),
|
@@ -178,7 +177,6 @@ def ensure_pipeline():
|
|
178 |
TEXT_PIPELINE = pipeline("text-generation", model=base_model, tokenizer=tokenizer)
|
179 |
return TEXT_PIPELINE
|
180 |
|
181 |
-
|
182 |
def ensure_comparison_pipeline():
|
183 |
"""
|
184 |
Load the DeepSeek model pipeline if not already loaded.
|
@@ -195,7 +193,6 @@ def ensure_comparison_pipeline():
|
|
195 |
COMPARISON_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
196 |
return COMPARISON_PIPELINE
|
197 |
|
198 |
-
|
199 |
@spaces.GPU(duration=120)
|
200 |
def predict(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
201 |
"""
|
@@ -212,7 +209,6 @@ def predict(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
|
212 |
)
|
213 |
return out[0]["generated_text"]
|
214 |
|
215 |
-
|
216 |
@spaces.GPU(duration=120)
|
217 |
def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
218 |
"""
|
@@ -239,7 +235,6 @@ def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
|
239 |
)
|
240 |
return local_out[0]["generated_text"], comp_out[0]["generated_text"]
|
241 |
|
242 |
-
|
243 |
###############################################################################
|
244 |
# Retrieval-Augmented Memory with FAISS
|
245 |
###############################################################################
|
@@ -300,7 +295,6 @@ class ConversationRetriever:
|
|
300 |
results.append((self.texts[idx], dist))
|
301 |
return results
|
302 |
|
303 |
-
|
304 |
###############################################################################
|
305 |
# Build a Chat that uses RAG
|
306 |
###############################################################################
|
@@ -325,7 +319,6 @@ def build_rag_prompt(user_query, retrieved_chunks):
|
|
325 |
)
|
326 |
return prompt
|
327 |
|
328 |
-
|
329 |
@spaces.GPU(duration=120)
|
330 |
def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_tokens):
|
331 |
"""
|
@@ -373,7 +366,6 @@ def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_to
|
|
373 |
history.append([user_input, assistant_reply])
|
374 |
return history, history
|
375 |
|
376 |
-
|
377 |
###############################################################################
|
378 |
# Gradio UI
|
379 |
###############################################################################
|
@@ -438,4 +430,4 @@ with gr.Blocks() as demo:
|
|
438 |
outputs=[chat_state, chatbot]
|
439 |
)
|
440 |
|
441 |
-
demo.launch()
|
|
|
151 |
|
152 |
return "Finetuning complete. Model loaded for inference."
|
153 |
|
|
|
154 |
def ensure_pipeline():
|
155 |
"""
|
156 |
If we haven't finetuned yet (TEXT_PIPELINE is None),
|
|
|
177 |
TEXT_PIPELINE = pipeline("text-generation", model=base_model, tokenizer=tokenizer)
|
178 |
return TEXT_PIPELINE
|
179 |
|
|
|
180 |
def ensure_comparison_pipeline():
|
181 |
"""
|
182 |
Load the DeepSeek model pipeline if not already loaded.
|
|
|
193 |
COMPARISON_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
194 |
return COMPARISON_PIPELINE
|
195 |
|
|
|
196 |
@spaces.GPU(duration=120)
|
197 |
def predict(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
198 |
"""
|
|
|
209 |
)
|
210 |
return out[0]["generated_text"]
|
211 |
|
|
|
212 |
@spaces.GPU(duration=120)
|
213 |
def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
|
214 |
"""
|
|
|
235 |
)
|
236 |
return local_out[0]["generated_text"], comp_out[0]["generated_text"]
|
237 |
|
|
|
238 |
###############################################################################
|
239 |
# Retrieval-Augmented Memory with FAISS
|
240 |
###############################################################################
|
|
|
295 |
results.append((self.texts[idx], dist))
|
296 |
return results
|
297 |
|
|
|
298 |
###############################################################################
|
299 |
# Build a Chat that uses RAG
|
300 |
###############################################################################
|
|
|
319 |
)
|
320 |
return prompt
|
321 |
|
|
|
322 |
@spaces.GPU(duration=120)
|
323 |
def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_tokens):
|
324 |
"""
|
|
|
366 |
history.append([user_input, assistant_reply])
|
367 |
return history, history
|
368 |
|
|
|
369 |
###############################################################################
|
370 |
# Gradio UI
|
371 |
###############################################################################
|
|
|
430 |
outputs=[chat_state, chatbot]
|
431 |
)
|
432 |
|
433 |
+
demo.launch()
|