wakeupmh commited on
Commit
8081db6
·
1 Parent(s): 97889da

fix: cpu usage

Browse files
Files changed (2) hide show
  1. app.py +9 -1
  2. requirements.txt +4 -1
app.py CHANGED
@@ -25,8 +25,13 @@ def load_local_model():
25
  model = AutoModelForSeq2SeqLM.from_pretrained(
26
  MODEL_PATH,
27
  torch_dtype=torch.float32,
28
- device_map="auto"
 
29
  )
 
 
 
 
30
  return model, tokenizer
31
 
32
  def fetch_arxiv_papers(query, max_results=5):
@@ -158,6 +163,9 @@ If the research doesn't address the question directly, explain what information
158
  # Generate response
159
  inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
160
 
 
 
 
161
  with torch.inference_mode():
162
  outputs = model.generate(
163
  **inputs,
 
25
  model = AutoModelForSeq2SeqLM.from_pretrained(
26
  MODEL_PATH,
27
  torch_dtype=torch.float32,
28
+ low_cpu_mem_usage=True,
29
+ device_map=None # Let PyTorch handle device placement
30
  )
31
+
32
+ # Move model to CPU explicitly
33
+ model = model.cpu()
34
+
35
  return model, tokenizer
36
 
37
  def fetch_arxiv_papers(query, max_results=5):
 
163
  # Generate response
164
  inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
165
 
166
+ # Move inputs to the same device as model
167
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
168
+
169
  with torch.inference_mode():
170
  outputs = model.generate(
171
  **inputs,
requirements.txt CHANGED
@@ -4,7 +4,10 @@ datasets>=2.17.0
4
  --extra-index-url https://download.pytorch.org/whl/cpu
5
  torch>=2.2.0
6
  accelerate>=0.26.0
 
7
  numpy>=1.24.0
8
  pandas>=2.2.0
9
  requests>=2.31.0
10
- arxiv>=2.1.0
 
 
 
4
  --extra-index-url https://download.pytorch.org/whl/cpu
5
  torch>=2.2.0
6
  accelerate>=0.26.0
7
+ safetensors>=0.4.1
8
  numpy>=1.24.0
9
  pandas>=2.2.0
10
  requests>=2.31.0
11
+ arxiv>=2.1.0
12
+ lancedb>=0.3.3
13
+ tantivy>=0.19.2