Spaces:

wakeupmh
/

ama-autism

Sleeping

wakeupmh commited on Feb 15

Commit

8081db6

1 Parent(s): 97889da

fix: cpu usage

Files changed (2) hide show

app.py CHANGED Viewed

@@ -25,8 +25,13 @@ def load_local_model():
     model = AutoModelForSeq2SeqLM.from_pretrained(
         MODEL_PATH,
         torch_dtype=torch.float32,
-        device_map="auto"
     )
     return model, tokenizer
 def fetch_arxiv_papers(query, max_results=5):
@@ -158,6 +163,9 @@ If the research doesn't address the question directly, explain what information
     # Generate response
     inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,

     model = AutoModelForSeq2SeqLM.from_pretrained(
         MODEL_PATH,
         torch_dtype=torch.float32,
+        low_cpu_mem_usage=True,
+        device_map=None  # Let PyTorch handle device placement
     )
+    # Move model to CPU explicitly
+    model = model.cpu()
     return model, tokenizer
 def fetch_arxiv_papers(query, max_results=5):
     # Generate response
     inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
+    # Move inputs to the same device as model
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,

requirements.txt CHANGED Viewed

@@ -4,7 +4,10 @@ datasets>=2.17.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch>=2.2.0
 accelerate>=0.26.0
 numpy>=1.24.0
 pandas>=2.2.0
 requests>=2.31.0
-arxiv>=2.1.0

 --extra-index-url https://download.pytorch.org/whl/cpu
 torch>=2.2.0
 accelerate>=0.26.0
+safetensors>=0.4.1
 numpy>=1.24.0
 pandas>=2.2.0
 requests>=2.31.0
+arxiv>=2.1.0
+lancedb>=0.3.3
+tantivy>=0.19.2