HF_Agents_Final_Assignment

Build error

App Files Files Community

leofltt commited on Sep 25

Commit

4966493

1 Parent(s): e5def8a

move to phi 3 mini 4k i

Browse files

Files changed (2) hide show

agent.py +18 -23
requirements.txt +3 -1

agent.py CHANGED Viewed

@@ -28,6 +28,8 @@ from typing import TypedDict, Annotated, List, Union
 from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
 import operator
 import json
 # (Keep Constants as is)
 # --- Constants -- -
@@ -249,19 +251,19 @@ class BasicAgent:
                 "Warning: HUGGING_FACE_HUB_TOKEN secret not found. This will fail for gated models."
             )
-        # Set device for pipeline
-        device = "cpu"
-        if torch.cuda.is_available():
-            device = "cuda"
-        elif torch.backends.mps.is_available():
-            device = "mps"
-        print(f"Device set to use {device}")
-        self.pipeline = pipeline(
-            "text-generation",
-            model="Qwen/Qwen1.5-7B-Chat",
-            dtype=torch.bfloat16,
-            device=device,
         )
         tools = [
@@ -325,12 +327,11 @@ The "action" must be one of [{tool_names}].
 If you have the final answer, respond with:
 FINAL ANSWER: [your answer]
-If the output of a tool is the final answer, just repeat it in the final answer format.</s>
 <|user|>
-Conversation history:
 {agent_scratchpad}
-Question: {input}</s>
 <|assistant|>
 '''
         prompt = PromptTemplate.from_template(prompt_template_str).partial(
@@ -347,13 +348,7 @@ Question: {input}</s>
         def llm_wrapper(prompt_value):
             prompt_str = prompt_value.to_string()
-            outputs = self.pipeline(
-                prompt_str,
-                max_new_tokens=256,
-            )
-            generated_text = outputs[0]["generated_text"]
-            # Remove the prompt from the generated text
-            return generated_text[len(prompt_str):]
         agent_runnable = (
             RunnablePassthrough.assign(

 from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
 import operator
 import json
+from langchain_community.llms import LlamaCpp
+from huggingface_hub import hf_hub_download
 # (Keep Constants as is)
 # --- Constants -- -
                 "Warning: HUGGING_FACE_HUB_TOKEN secret not found. This will fail for gated models."
             )
+        model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
+        model_file = "Phi-3-mini-4k-instruct-q4.gguf"
+        model_path = f"./{model_file}"
+        if not os.path.exists(model_path):
+            print(f"Downloading model to {model_path}...")
+            hf_hub_download(repo_id=model_name, filename=model_file, local_dir=".")
+        self.llm = LlamaCpp(
+            model_path=model_path,
+            n_ctx=4096,        # Context window size
+            n_gpu_layers=0,    # Set to 0 to use CPU only
+            verbose=True,      # For debugging
         )
         tools = [
 If you have the final answer, respond with:
 FINAL ANSWER: [your answer]
+If the output of a tool is the final answer, just repeat it in the final answer format.<|end|>
 <|user|>
 {agent_scratchpad}
+Question: {input}<|end|>
 <|assistant|>
 '''
         prompt = PromptTemplate.from_template(prompt_template_str).partial(
         def llm_wrapper(prompt_value):
             prompt_str = prompt_value.to_string()
+            return self.llm.invoke(prompt_str)
         agent_runnable = (
             RunnablePassthrough.assign(

requirements.txt CHANGED Viewed

@@ -23,4 +23,6 @@ torchaudio
 torchvision
 av
 ffmpeg-python
-Pillow

 torchvision
 av
 ffmpeg-python
+Pillow
+llama-cpp-python
+huggingface_hub