Spaces:
Running
Running
debug: disk space
Browse files- interfaces/ontolisst.py +30 -0
interfaces/ontolisst.py
CHANGED
@@ -15,6 +15,26 @@ languages = [
|
|
15 |
|
16 |
from label_dicts import ONTOLISST_LABEL_NAMES
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def build_huggingface_path(language: str):
|
20 |
return "poltextlab/xlm-roberta-large_ontolisst_v1"
|
@@ -23,6 +43,16 @@ def predict(text, model_id, tokenizer_id):
|
|
23 |
device = torch.device("cpu")
|
24 |
model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN)
|
25 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
model.to(device)
|
27 |
|
28 |
inputs = tokenizer(text,
|
|
|
15 |
|
16 |
from label_dicts import ONTOLISST_LABEL_NAMES
|
17 |
|
18 |
+
# --- DEBUG ---
|
19 |
+
import shutil
|
20 |
+
|
21 |
+
def convert_size(size):
|
22 |
+
for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']:
|
23 |
+
if size < 1024:
|
24 |
+
return f"{size:.2f} {unit}"
|
25 |
+
size /= 1024
|
26 |
+
|
27 |
+
def get_disk_space(path="/"):
|
28 |
+
total, used, free = shutil.disk_usage(path)
|
29 |
+
|
30 |
+
return {
|
31 |
+
"Total": convert_size(total),
|
32 |
+
"Used": convert_size(used),
|
33 |
+
"Free": convert_size(free)
|
34 |
+
}
|
35 |
+
|
36 |
+
# ---
|
37 |
+
|
38 |
|
39 |
def build_huggingface_path(language: str):
|
40 |
return "poltextlab/xlm-roberta-large_ontolisst_v1"
|
|
|
43 |
device = torch.device("cpu")
|
44 |
model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN)
|
45 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
46 |
+
|
47 |
+
# --- DEBUG ---
|
48 |
+
|
49 |
+
disk_space = get_disk_space('/data/')
|
50 |
+
print("Disk Space Info:")
|
51 |
+
for key, value in disk_space.items():
|
52 |
+
print(f"{key}: {value}")
|
53 |
+
|
54 |
+
# ---
|
55 |
+
|
56 |
model.to(device)
|
57 |
|
58 |
inputs = tokenizer(text,
|