Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

LamiaYT commited on Jun 25

Commit

2828102

1 Parent(s): e0860a0

Fix device mapping and improve error handling

Files changed (2) hide show

agent/local_llm.py CHANGED Viewed

@@ -4,8 +4,7 @@ from accelerate import Accelerator
 class LocalLLM:
     def __init__(self):
-        self.model_name = "HuggingFaceH4/zephyr-7b-beta"
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.pipeline = self._load_model()
     def _load_model(self):
@@ -13,7 +12,7 @@ class LocalLLM:
             # First try with 4-bit quantization
             return self._load_quantized_model()
         except Exception as e:
-            print(f"4-bit loading failed: {str(e)}. Trying without quantization...")
             return self._load_fallback_model()
     def _load_quantized_model(self):
@@ -28,8 +27,7 @@ class LocalLLM:
         return pipeline(
             "text-generation",
             model=model,
-            tokenizer=tokenizer,
-            device=self.device
         )
     def _load_fallback_model(self):
@@ -42,15 +40,17 @@ class LocalLLM:
         return pipeline(
             "text-generation",
             model=model,
-            tokenizer=tokenizer,
-            device=self.device
         )
     def generate(self, prompt: str) -> str:
-        outputs = self.pipeline(
-            prompt,
-            max_new_tokens=256,
-            do_sample=True,
-            temperature=0.7
-        )
-        return outputs[0]['generated_text']

 class LocalLLM:
     def __init__(self):
+        self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Using smaller model
         self.pipeline = self._load_model()
     def _load_model(self):
             # First try with 4-bit quantization
             return self._load_quantized_model()
         except Exception as e:
+            print(f"Quantized loading failed: {str(e)}. Trying without quantization...")
             return self._load_fallback_model()
     def _load_quantized_model(self):
         return pipeline(
             "text-generation",
             model=model,
+            tokenizer=tokenizer  # Removed device parameter
         )
     def _load_fallback_model(self):
         return pipeline(
             "text-generation",
             model=model,
+            tokenizer=tokenizer  # Removed device parameter
         )
     def generate(self, prompt: str) -> str:
+        try:
+            outputs = self.pipeline(
+                prompt,
+                max_new_tokens=256,
+                do_sample=True,
+                temperature=0.7
+            )
+            return outputs[0]['generated_text']
+        except Exception as e:
+            return f"Error generating response: {str(e)}"

app.py CHANGED Viewed

@@ -6,13 +6,21 @@ from utils.gaia_api import GaiaAPI
 # Initialize components
 try:
-    from agent.local_llm import LocalLLM
     llm = LocalLLM()
     agent = ReActAgent.from_tools(gaia_tools, llm=llm.pipeline)
 except Exception as e:
-    print(f"Failed to initialize LLM: {str(e)}")
-    # Fallback to a simpler agent if needed
     agent = None
 def process_question(question_text: str) -> str:
     """Process GAIA question through agent"""
     try:

 # Initialize components
 try:
     llm = LocalLLM()
     agent = ReActAgent.from_tools(gaia_tools, llm=llm.pipeline)
 except Exception as e:
+    print(f"Agent initialization failed: {str(e)}")
     agent = None
+def process_question(question_text: str) -> str:
+    if not agent:
+        return "Agent initialization failed - please check logs"
+    try:
+        response = agent.query(question_text)
+        return str(response)
+    except Exception as e:
+        return f"Error processing question: {str(e)}"
 def process_question(question_text: str) -> str:
     """Process GAIA question through agent"""
     try: