pranavajay commited on
Commit
915a47b
·
verified ·
1 Parent(s): 590e9a6

Update rp.py

Browse files
Files changed (1) hide show
  1. rp.py +5 -4
rp.py CHANGED
@@ -16,10 +16,11 @@ def reduce_key_size(input_file, output_file, reduction_factor=0.50):
16
  if new_size > 0: # Ensure new size is positive
17
  reduced_tensor = original_tensor[:new_size]
18
 
19
- # Convert to FP8 (assuming your environment supports FP8)
20
- # Note: PyTorch does not have built-in FP8 support; you may need to use a custom implementation
21
- # Here's an example of converting a tensor to float16, then quantizing it
22
- fp8_tensor = torch.quantize_per_tensor(reduced_tensor.to(torch.float16), scale=1.0, zero_point=0, dtype=torch.qint8)
 
23
  model_data[key] = fp8_tensor
24
 
25
  # Save the modified model
 
16
  if new_size > 0: # Ensure new size is positive
17
  reduced_tensor = original_tensor[:new_size]
18
 
19
+ # Convert to FP32 before quantizing
20
+ fp32_tensor = reduced_tensor.to(torch.float32)
21
+
22
+ # Quantize the tensor to 8-bit integer format (or FP8 if available)
23
+ fp8_tensor = torch.quantize_per_tensor(fp32_tensor, scale=1.0, zero_point=0, dtype=torch.qint8)
24
  model_data[key] = fp8_tensor
25
 
26
  # Save the modified model