Update rp.py
Browse files
rp.py
CHANGED
@@ -16,10 +16,11 @@ def reduce_key_size(input_file, output_file, reduction_factor=0.50):
|
|
16 |
if new_size > 0: # Ensure new size is positive
|
17 |
reduced_tensor = original_tensor[:new_size]
|
18 |
|
19 |
-
# Convert to
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
23 |
model_data[key] = fp8_tensor
|
24 |
|
25 |
# Save the modified model
|
|
|
16 |
if new_size > 0: # Ensure new size is positive
|
17 |
reduced_tensor = original_tensor[:new_size]
|
18 |
|
19 |
+
# Convert to FP32 before quantizing
|
20 |
+
fp32_tensor = reduced_tensor.to(torch.float32)
|
21 |
+
|
22 |
+
# Quantize the tensor to 8-bit integer format (or FP8 if available)
|
23 |
+
fp8_tensor = torch.quantize_per_tensor(fp32_tensor, scale=1.0, zero_point=0, dtype=torch.qint8)
|
24 |
model_data[key] = fp8_tensor
|
25 |
|
26 |
# Save the modified model
|