poltextlab commited on
Commit
f24c602
·
verified ·
1 Parent(s): 64f51f4

debug: disk space

Browse files
Files changed (1) hide show
  1. interfaces/ontolisst.py +30 -0
interfaces/ontolisst.py CHANGED
@@ -15,6 +15,26 @@ languages = [
15
 
16
  from label_dicts import ONTOLISST_LABEL_NAMES
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def build_huggingface_path(language: str):
20
  return "poltextlab/xlm-roberta-large_ontolisst_v1"
@@ -23,6 +43,16 @@ def predict(text, model_id, tokenizer_id):
23
  device = torch.device("cpu")
24
  model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN)
25
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
 
 
 
 
 
 
 
 
 
 
26
  model.to(device)
27
 
28
  inputs = tokenizer(text,
 
15
 
16
  from label_dicts import ONTOLISST_LABEL_NAMES
17
 
18
+ # --- DEBUG ---
19
+ import shutil
20
+
21
+ def convert_size(size):
22
+ for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']:
23
+ if size < 1024:
24
+ return f"{size:.2f} {unit}"
25
+ size /= 1024
26
+
27
+ def get_disk_space(path="/"):
28
+ total, used, free = shutil.disk_usage(path)
29
+
30
+ return {
31
+ "Total": convert_size(total),
32
+ "Used": convert_size(used),
33
+ "Free": convert_size(free)
34
+ }
35
+
36
+ # ---
37
+
38
 
39
  def build_huggingface_path(language: str):
40
  return "poltextlab/xlm-roberta-large_ontolisst_v1"
 
43
  device = torch.device("cpu")
44
  model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN)
45
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
46
+
47
+ # --- DEBUG ---
48
+
49
+ disk_space = get_disk_space('/data/')
50
+ print("Disk Space Info:")
51
+ for key, value in disk_space.items():
52
+ print(f"{key}: {value}")
53
+
54
+ # ---
55
+
56
  model.to(device)
57
 
58
  inputs = tokenizer(text,