leofltt commited on
Commit
4966493
·
1 Parent(s): e5def8a

move to phi 3 mini 4k i

Browse files
Files changed (2) hide show
  1. agent.py +18 -23
  2. requirements.txt +3 -1
agent.py CHANGED
@@ -28,6 +28,8 @@ from typing import TypedDict, Annotated, List, Union
28
  from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
29
  import operator
30
  import json
 
 
31
 
32
  # (Keep Constants as is)
33
  # --- Constants -- -
@@ -249,19 +251,19 @@ class BasicAgent:
249
  "Warning: HUGGING_FACE_HUB_TOKEN secret not found. This will fail for gated models."
250
  )
251
 
252
- # Set device for pipeline
253
- device = "cpu"
254
- if torch.cuda.is_available():
255
- device = "cuda"
256
- elif torch.backends.mps.is_available():
257
- device = "mps"
258
- print(f"Device set to use {device}")
259
-
260
- self.pipeline = pipeline(
261
- "text-generation",
262
- model="Qwen/Qwen1.5-7B-Chat",
263
- dtype=torch.bfloat16,
264
- device=device,
265
  )
266
 
267
  tools = [
@@ -325,12 +327,11 @@ The "action" must be one of [{tool_names}].
325
  If you have the final answer, respond with:
326
  FINAL ANSWER: [your answer]
327
 
328
- If the output of a tool is the final answer, just repeat it in the final answer format.</s>
329
  <|user|>
330
- Conversation history:
331
  {agent_scratchpad}
332
 
333
- Question: {input}</s>
334
  <|assistant|>
335
  '''
336
  prompt = PromptTemplate.from_template(prompt_template_str).partial(
@@ -347,13 +348,7 @@ Question: {input}</s>
347
 
348
  def llm_wrapper(prompt_value):
349
  prompt_str = prompt_value.to_string()
350
- outputs = self.pipeline(
351
- prompt_str,
352
- max_new_tokens=256,
353
- )
354
- generated_text = outputs[0]["generated_text"]
355
- # Remove the prompt from the generated text
356
- return generated_text[len(prompt_str):]
357
 
358
  agent_runnable = (
359
  RunnablePassthrough.assign(
 
28
  from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
29
  import operator
30
  import json
31
+ from langchain_community.llms import LlamaCpp
32
+ from huggingface_hub import hf_hub_download
33
 
34
  # (Keep Constants as is)
35
  # --- Constants -- -
 
251
  "Warning: HUGGING_FACE_HUB_TOKEN secret not found. This will fail for gated models."
252
  )
253
 
254
+ model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
255
+ model_file = "Phi-3-mini-4k-instruct-q4.gguf"
256
+ model_path = f"./{model_file}"
257
+
258
+ if not os.path.exists(model_path):
259
+ print(f"Downloading model to {model_path}...")
260
+ hf_hub_download(repo_id=model_name, filename=model_file, local_dir=".")
261
+
262
+ self.llm = LlamaCpp(
263
+ model_path=model_path,
264
+ n_ctx=4096, # Context window size
265
+ n_gpu_layers=0, # Set to 0 to use CPU only
266
+ verbose=True, # For debugging
267
  )
268
 
269
  tools = [
 
327
  If you have the final answer, respond with:
328
  FINAL ANSWER: [your answer]
329
 
330
+ If the output of a tool is the final answer, just repeat it in the final answer format.<|end|>
331
  <|user|>
 
332
  {agent_scratchpad}
333
 
334
+ Question: {input}<|end|>
335
  <|assistant|>
336
  '''
337
  prompt = PromptTemplate.from_template(prompt_template_str).partial(
 
348
 
349
  def llm_wrapper(prompt_value):
350
  prompt_str = prompt_value.to_string()
351
+ return self.llm.invoke(prompt_str)
 
 
 
 
 
 
352
 
353
  agent_runnable = (
354
  RunnablePassthrough.assign(
requirements.txt CHANGED
@@ -23,4 +23,6 @@ torchaudio
23
  torchvision
24
  av
25
  ffmpeg-python
26
- Pillow
 
 
 
23
  torchvision
24
  av
25
  ffmpeg-python
26
+ Pillow
27
+ llama-cpp-python
28
+ huggingface_hub