Update rp.py
Browse files
rp.py
CHANGED
@@ -12,16 +12,13 @@ def reduce_key_size(input_file, output_file, reduction_factor=0.50):
|
|
12 |
# Calculate the new size
|
13 |
new_size = int(original_tensor.size(0) * (1 - reduction_factor))
|
14 |
|
15 |
-
# Resize the tensor (
|
16 |
-
if new_size > 0:
|
17 |
reduced_tensor = original_tensor[:new_size]
|
18 |
|
19 |
-
# Convert to
|
20 |
-
|
21 |
-
|
22 |
-
# Quantize the tensor to 8-bit integer format (or FP8 if available)
|
23 |
-
fp8_tensor = torch.quantize_per_tensor(fp32_tensor, scale=1.0, zero_point=0, dtype=torch.qint8)
|
24 |
-
model_data[key] = fp8_tensor
|
25 |
|
26 |
# Save the modified model
|
27 |
save_file(model_data, output_file)
|
|
|
12 |
# Calculate the new size
|
13 |
new_size = int(original_tensor.size(0) * (1 - reduction_factor))
|
14 |
|
15 |
+
# Resize the tensor (ensure the new size is positive)
|
16 |
+
if new_size > 0:
|
17 |
reduced_tensor = original_tensor[:new_size]
|
18 |
|
19 |
+
# Convert to FP16 precision (half-precision floating point)
|
20 |
+
fp16_tensor = reduced_tensor.to(torch.float16)
|
21 |
+
model_data[key] = fp16_tensor
|
|
|
|
|
|
|
22 |
|
23 |
# Save the modified model
|
24 |
save_file(model_data, output_file)
|