Spaces:
Running
Running
kovacsvi
commited on
Commit
·
8453705
1
Parent(s):
7cbaea3
delete unused model weights (before JIT)
Browse files
utils.py
CHANGED
@@ -63,27 +63,26 @@ for domain in domains_illframes.values():
|
|
63 |
tokenizers = ["xlm-roberta-large"]
|
64 |
|
65 |
def download_hf_models():
|
66 |
-
# Ensure the JIT model directory exists
|
67 |
os.makedirs(JIT_DIR, exist_ok=True)
|
68 |
|
69 |
for model_id in models:
|
70 |
print(f"Downloading + JIT tracing model: {model_id}")
|
71 |
|
72 |
-
# Load model and tokenizer
|
73 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
74 |
-
model_id,
|
75 |
-
token=HF_TOKEN,
|
76 |
-
device_map="auto"
|
77 |
-
)
|
78 |
-
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
|
79 |
-
|
80 |
safe_model_name = model_id.replace("/", "_")
|
81 |
traced_model_path = os.path.join(JIT_DIR, f"{safe_model_name}.pt")
|
82 |
|
83 |
if os.path.exists(traced_model_path):
|
|
|
84 |
print(f"⏩ Skipping JIT — already exists: {traced_model_path}")
|
85 |
else:
|
86 |
print(f"⚙️ Tracing and saving: {traced_model_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
model.eval()
|
89 |
|
@@ -116,6 +115,15 @@ def df_h():
|
|
116 |
print("=== Disk Usage for /data/ (du -h --max-depth=2) ===")
|
117 |
print(du_result.stdout)
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
def delete_http_folders():
|
121 |
http_folders = glob.glob("/data/http*")
|
|
|
63 |
tokenizers = ["xlm-roberta-large"]
|
64 |
|
65 |
def download_hf_models():
|
|
|
66 |
os.makedirs(JIT_DIR, exist_ok=True)
|
67 |
|
68 |
for model_id in models:
|
69 |
print(f"Downloading + JIT tracing model: {model_id}")
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
safe_model_name = model_id.replace("/", "_")
|
72 |
traced_model_path = os.path.join(JIT_DIR, f"{safe_model_name}.pt")
|
73 |
|
74 |
if os.path.exists(traced_model_path):
|
75 |
+
delete_unused_bin_files(model_id)
|
76 |
print(f"⏩ Skipping JIT — already exists: {traced_model_path}")
|
77 |
else:
|
78 |
print(f"⚙️ Tracing and saving: {traced_model_path}")
|
79 |
+
|
80 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
81 |
+
model_id,
|
82 |
+
token=HF_TOKEN,
|
83 |
+
device_map="auto"
|
84 |
+
)
|
85 |
+
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
|
86 |
|
87 |
model.eval()
|
88 |
|
|
|
115 |
print("=== Disk Usage for /data/ (du -h --max-depth=2) ===")
|
116 |
print(du_result.stdout)
|
117 |
|
118 |
+
|
119 |
+
def delete_unused_bin_files(model_id: str):
|
120 |
+
target_path = f"/data/models--poltextlab--{model_id}"
|
121 |
+
bin_files = glob.glob(f"{target_path}/**/*.bin", recursive=True)
|
122 |
+
for file_path in bin_files:
|
123 |
+
if os.path.isfile(file_path):
|
124 |
+
print(f"Deleting: {file_path}")
|
125 |
+
os.remove(file_path)
|
126 |
+
|
127 |
|
128 |
def delete_http_folders():
|
129 |
http_folders = glob.glob("/data/http*")
|