Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

LamiaYT commited on Jun 25

Commit

e0860a0

1 Parent(s): 8ac5ef4

Fix quantization dependencies and add fallback

Files changed (4) hide show

README.md CHANGED Viewed

@@ -11,3 +11,4 @@ short_description: Test To Pass GAIA
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


11	---
12
13	Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14	+

agent/local_llm.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
 class LocalLLM:
     def __init__(self):
@@ -8,12 +9,35 @@ class LocalLLM:
         self.pipeline = self._load_model()
     def _load_model(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         model = AutoModelForCausalLM.from_pretrained(
             self.model_name,
             torch_dtype=torch.float16,
             device_map="auto",
-            load_in_4bit=True  # Quantization to save memory
         )
         return pipeline(
             "text-generation",

 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
+from accelerate import Accelerator
 class LocalLLM:
     def __init__(self):
         self.pipeline = self._load_model()
     def _load_model(self):
+        try:
+            # First try with 4-bit quantization
+            return self._load_quantized_model()
+        except Exception as e:
+            print(f"4-bit loading failed: {str(e)}. Trying without quantization...")
+            return self._load_fallback_model()
+    def _load_quantized_model(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         model = AutoModelForCausalLM.from_pretrained(
             self.model_name,
             torch_dtype=torch.float16,
             device_map="auto",
+            load_in_4bit=True,
+            low_cpu_mem_usage=True
+        )
+        return pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device=self.device
+        )
+    def _load_fallback_model(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            torch_dtype=torch.float16,
+            device_map="auto"
         )
         return pipeline(
             "text-generation",

app.py CHANGED Viewed

@@ -5,9 +5,14 @@ from llama_index.core.agent import ReActAgent
 from utils.gaia_api import GaiaAPI
 # Initialize components
-llm = LocalLLM()
-agent = ReActAgent.from_tools(gaia_tools, llm=llm.pipeline)
 def process_question(question_text: str) -> str:
     """Process GAIA question through agent"""
     try:

 from utils.gaia_api import GaiaAPI
 # Initialize components
+try:
+    from agent.local_llm import LocalLLM
+    llm = LocalLLM()
+    agent = ReActAgent.from_tools(gaia_tools, llm=llm.pipeline)
+except Exception as e:
+    print(f"Failed to initialize LLM: {str(e)}")
+    # Fallback to a simpler agent if needed
+    agent = None
 def process_question(question_text: str) -> str:
     """Process GAIA question through agent"""
     try:

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
-llama-index==0.10.0
-transformers==4.34.0
-torch==2.0.1
-accelerate==0.23.0
-sentence-transformers==2.2.2
-python-dotenv==1.0.0
-gradio==3.41.0
-requests==2.31.0
-bitsandbytes==0.41.1

+accelerate>=0.23.0
+bitsandbytes>=0.41.1
+torch>=2.0.1
+transformers>=4.34.0
+llama-index>=0.10.0
+gradio>=3.41.0
+sentence-transformers>=2.2.2
+python-dotenv>=1.0.0
+requests>=2.31.0
+nltk>=3.8.1