pranavajay
/

mk

pranavajay commited on Sep 27, 2024

Commit

2135a4c

verified ·

1 Parent(s): 915a47b

Update rp.py

Files changed (1) hide show

rp.py CHANGED Viewed

@@ -12,16 +12,13 @@ def reduce_key_size(input_file, output_file, reduction_factor=0.50):
         # Calculate the new size
         new_size = int(original_tensor.size(0) * (1 - reduction_factor))
-        # Resize the tensor (this could vary depending on your requirements)
-        if new_size > 0:  # Ensure new size is positive
             reduced_tensor = original_tensor[:new_size]
-            # Convert to FP32 before quantizing
-            fp32_tensor = reduced_tensor.to(torch.float32)
-            # Quantize the tensor to 8-bit integer format (or FP8 if available)
-            fp8_tensor = torch.quantize_per_tensor(fp32_tensor, scale=1.0, zero_point=0, dtype=torch.qint8)
-            model_data[key] = fp8_tensor
     # Save the modified model
     save_file(model_data, output_file)

         # Calculate the new size
         new_size = int(original_tensor.size(0) * (1 - reduction_factor))
+        # Resize the tensor (ensure the new size is positive)
+        if new_size > 0:
             reduced_tensor = original_tensor[:new_size]
+            # Convert to FP16 precision (half-precision floating point)
+            fp16_tensor = reduced_tensor.to(torch.float16)
+            model_data[key] = fp16_tensor
     # Save the modified model
     save_file(model_data, output_file)