pranavajay commited on
Commit
2135a4c
·
verified ·
1 Parent(s): 915a47b

Update rp.py

Browse files
Files changed (1) hide show
  1. rp.py +5 -8
rp.py CHANGED
@@ -12,16 +12,13 @@ def reduce_key_size(input_file, output_file, reduction_factor=0.50):
12
  # Calculate the new size
13
  new_size = int(original_tensor.size(0) * (1 - reduction_factor))
14
 
15
- # Resize the tensor (this could vary depending on your requirements)
16
- if new_size > 0: # Ensure new size is positive
17
  reduced_tensor = original_tensor[:new_size]
18
 
19
- # Convert to FP32 before quantizing
20
- fp32_tensor = reduced_tensor.to(torch.float32)
21
-
22
- # Quantize the tensor to 8-bit integer format (or FP8 if available)
23
- fp8_tensor = torch.quantize_per_tensor(fp32_tensor, scale=1.0, zero_point=0, dtype=torch.qint8)
24
- model_data[key] = fp8_tensor
25
 
26
  # Save the modified model
27
  save_file(model_data, output_file)
 
12
  # Calculate the new size
13
  new_size = int(original_tensor.size(0) * (1 - reduction_factor))
14
 
15
+ # Resize the tensor (ensure the new size is positive)
16
+ if new_size > 0:
17
  reduced_tensor = original_tensor[:new_size]
18
 
19
+ # Convert to FP16 precision (half-precision floating point)
20
+ fp16_tensor = reduced_tensor.to(torch.float16)
21
+ model_data[key] = fp16_tensor
 
 
 
22
 
23
  # Save the modified model
24
  save_file(model_data, output_file)