Update rp.py
Browse files
rp.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import torch
|
2 |
from safetensors.torch import load_file, save_file
|
3 |
|
4 |
-
def reduce_key_size(input_file, output_file, reduction_factor=0.
|
5 |
# Load the model
|
6 |
model_data = load_file(input_file)
|
7 |
|
@@ -15,12 +15,17 @@ def reduce_key_size(input_file, output_file, reduction_factor=0.30):
|
|
15 |
# Resize the tensor (this could vary depending on your requirements)
|
16 |
if new_size > 0: # Ensure new size is positive
|
17 |
reduced_tensor = original_tensor[:new_size]
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Save the modified model
|
21 |
save_file(model_data, output_file)
|
22 |
|
23 |
# Usage example
|
24 |
-
input_file = '
|
25 |
-
output_file = '
|
26 |
reduce_key_size(input_file, output_file)
|
|
|
1 |
import torch
|
2 |
from safetensors.torch import load_file, save_file
|
3 |
|
4 |
+
def reduce_key_size(input_file, output_file, reduction_factor=0.50):
|
5 |
# Load the model
|
6 |
model_data = load_file(input_file)
|
7 |
|
|
|
15 |
# Resize the tensor (this could vary depending on your requirements)
|
16 |
if new_size > 0: # Ensure new size is positive
|
17 |
reduced_tensor = original_tensor[:new_size]
|
18 |
+
|
19 |
+
# Convert to FP8 (assuming your environment supports FP8)
|
20 |
+
# Note: PyTorch does not have built-in FP8 support; you may need to use a custom implementation
|
21 |
+
# Here's an example of converting a tensor to float16, then quantizing it
|
22 |
+
fp8_tensor = torch.quantize_per_tensor(reduced_tensor.to(torch.float16), scale=1.0, zero_point=0, dtype=torch.qint8)
|
23 |
+
model_data[key] = fp8_tensor
|
24 |
|
25 |
# Save the modified model
|
26 |
save_file(model_data, output_file)
|
27 |
|
28 |
# Usage example
|
29 |
+
input_file = 'merged_model_16.safetensors' # Replace with your input model file
|
30 |
+
output_file = 'merged_model_8.safetensors' # Desired output file name
|
31 |
reduce_key_size(input_file, output_file)
|