Spaces:
Build error
Build error
move to phi 3 mini 4k i
Browse files- agent.py +18 -23
- requirements.txt +3 -1
agent.py
CHANGED
|
@@ -28,6 +28,8 @@ from typing import TypedDict, Annotated, List, Union
|
|
| 28 |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
|
| 29 |
import operator
|
| 30 |
import json
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# (Keep Constants as is)
|
| 33 |
# --- Constants -- -
|
|
@@ -249,19 +251,19 @@ class BasicAgent:
|
|
| 249 |
"Warning: HUGGING_FACE_HUB_TOKEN secret not found. This will fail for gated models."
|
| 250 |
)
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
self.
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
)
|
| 266 |
|
| 267 |
tools = [
|
|
@@ -325,12 +327,11 @@ The "action" must be one of [{tool_names}].
|
|
| 325 |
If you have the final answer, respond with:
|
| 326 |
FINAL ANSWER: [your answer]
|
| 327 |
|
| 328 |
-
If the output of a tool is the final answer, just repeat it in the final answer format
|
| 329 |
<|user|>
|
| 330 |
-
Conversation history:
|
| 331 |
{agent_scratchpad}
|
| 332 |
|
| 333 |
-
Question: {input}
|
| 334 |
<|assistant|>
|
| 335 |
'''
|
| 336 |
prompt = PromptTemplate.from_template(prompt_template_str).partial(
|
|
@@ -347,13 +348,7 @@ Question: {input}</s>
|
|
| 347 |
|
| 348 |
def llm_wrapper(prompt_value):
|
| 349 |
prompt_str = prompt_value.to_string()
|
| 350 |
-
|
| 351 |
-
prompt_str,
|
| 352 |
-
max_new_tokens=256,
|
| 353 |
-
)
|
| 354 |
-
generated_text = outputs[0]["generated_text"]
|
| 355 |
-
# Remove the prompt from the generated text
|
| 356 |
-
return generated_text[len(prompt_str):]
|
| 357 |
|
| 358 |
agent_runnable = (
|
| 359 |
RunnablePassthrough.assign(
|
|
|
|
| 28 |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
|
| 29 |
import operator
|
| 30 |
import json
|
| 31 |
+
from langchain_community.llms import LlamaCpp
|
| 32 |
+
from huggingface_hub import hf_hub_download
|
| 33 |
|
| 34 |
# (Keep Constants as is)
|
| 35 |
# --- Constants -- -
|
|
|
|
| 251 |
"Warning: HUGGING_FACE_HUB_TOKEN secret not found. This will fail for gated models."
|
| 252 |
)
|
| 253 |
|
| 254 |
+
model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
|
| 255 |
+
model_file = "Phi-3-mini-4k-instruct-q4.gguf"
|
| 256 |
+
model_path = f"./{model_file}"
|
| 257 |
+
|
| 258 |
+
if not os.path.exists(model_path):
|
| 259 |
+
print(f"Downloading model to {model_path}...")
|
| 260 |
+
hf_hub_download(repo_id=model_name, filename=model_file, local_dir=".")
|
| 261 |
+
|
| 262 |
+
self.llm = LlamaCpp(
|
| 263 |
+
model_path=model_path,
|
| 264 |
+
n_ctx=4096, # Context window size
|
| 265 |
+
n_gpu_layers=0, # Set to 0 to use CPU only
|
| 266 |
+
verbose=True, # For debugging
|
| 267 |
)
|
| 268 |
|
| 269 |
tools = [
|
|
|
|
| 327 |
If you have the final answer, respond with:
|
| 328 |
FINAL ANSWER: [your answer]
|
| 329 |
|
| 330 |
+
If the output of a tool is the final answer, just repeat it in the final answer format.<|end|>
|
| 331 |
<|user|>
|
|
|
|
| 332 |
{agent_scratchpad}
|
| 333 |
|
| 334 |
+
Question: {input}<|end|>
|
| 335 |
<|assistant|>
|
| 336 |
'''
|
| 337 |
prompt = PromptTemplate.from_template(prompt_template_str).partial(
|
|
|
|
| 348 |
|
| 349 |
def llm_wrapper(prompt_value):
|
| 350 |
prompt_str = prompt_value.to_string()
|
| 351 |
+
return self.llm.invoke(prompt_str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
agent_runnable = (
|
| 354 |
RunnablePassthrough.assign(
|
requirements.txt
CHANGED
|
@@ -23,4 +23,6 @@ torchaudio
|
|
| 23 |
torchvision
|
| 24 |
av
|
| 25 |
ffmpeg-python
|
| 26 |
-
Pillow
|
|
|
|
|
|
|
|
|
| 23 |
torchvision
|
| 24 |
av
|
| 25 |
ffmpeg-python
|
| 26 |
+
Pillow
|
| 27 |
+
llama-cpp-python
|
| 28 |
+
huggingface_hub
|