Spaces:
Sleeping
Sleeping
fix: cpu usage
Browse files- app.py +9 -1
- requirements.txt +4 -1
app.py
CHANGED
@@ -25,8 +25,13 @@ def load_local_model():
|
|
25 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
26 |
MODEL_PATH,
|
27 |
torch_dtype=torch.float32,
|
28 |
-
|
|
|
29 |
)
|
|
|
|
|
|
|
|
|
30 |
return model, tokenizer
|
31 |
|
32 |
def fetch_arxiv_papers(query, max_results=5):
|
@@ -158,6 +163,9 @@ If the research doesn't address the question directly, explain what information
|
|
158 |
# Generate response
|
159 |
inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
|
160 |
|
|
|
|
|
|
|
161 |
with torch.inference_mode():
|
162 |
outputs = model.generate(
|
163 |
**inputs,
|
|
|
25 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
26 |
MODEL_PATH,
|
27 |
torch_dtype=torch.float32,
|
28 |
+
low_cpu_mem_usage=True,
|
29 |
+
device_map=None # Let PyTorch handle device placement
|
30 |
)
|
31 |
+
|
32 |
+
# Move model to CPU explicitly
|
33 |
+
model = model.cpu()
|
34 |
+
|
35 |
return model, tokenizer
|
36 |
|
37 |
def fetch_arxiv_papers(query, max_results=5):
|
|
|
163 |
# Generate response
|
164 |
inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
|
165 |
|
166 |
+
# Move inputs to the same device as model
|
167 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
168 |
+
|
169 |
with torch.inference_mode():
|
170 |
outputs = model.generate(
|
171 |
**inputs,
|
requirements.txt
CHANGED
@@ -4,7 +4,10 @@ datasets>=2.17.0
|
|
4 |
--extra-index-url https://download.pytorch.org/whl/cpu
|
5 |
torch>=2.2.0
|
6 |
accelerate>=0.26.0
|
|
|
7 |
numpy>=1.24.0
|
8 |
pandas>=2.2.0
|
9 |
requests>=2.31.0
|
10 |
-
arxiv>=2.1.0
|
|
|
|
|
|
4 |
--extra-index-url https://download.pytorch.org/whl/cpu
|
5 |
torch>=2.2.0
|
6 |
accelerate>=0.26.0
|
7 |
+
safetensors>=0.4.1
|
8 |
numpy>=1.24.0
|
9 |
pandas>=2.2.0
|
10 |
requests>=2.31.0
|
11 |
+
arxiv>=2.1.0
|
12 |
+
lancedb>=0.3.3
|
13 |
+
tantivy>=0.19.2
|