ZeppelinCorp
/

Charm_15

Text Generation

Mixture of Experts

text-generation-inference

673_trillion_parameters

Model card Files Files and versions Community

GeminiFan207 commited on about 1 month ago

Commit

87cbc7b

·

verified ·

1 Parent(s): 09c4008

Create base_model.safetensors

Files changed (1) hide show

base_model.safetensors +67 -0

base_model.safetensors ADDED Viewed

	@@ -0,0 +1,67 @@

+import os
+import torch
+from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
+from safetensors.torch import save_file
+# Define model and output settings
+model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"  # Corrected to a real model
+output_dir = "mixtral_8x7b_safetensors"
+max_shard_size = 2 * 1024 * 1024 * 1024  # 2GB per shard in bytes
+dtype = torch.float16  # Half-precision to save space
+# Create output directory
+os.makedirs(output_dir, exist_ok=True)
+try:
+    # Load config and tokenizer first (low memory footprint)
+    config = AutoConfig.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # Save config and tokenizer for later use
+    config.save_pretrained(output_dir)
+    tokenizer.save_pretrained(output_dir)
+    # Load model with offloading to avoid OOM (if GPU available)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=dtype,
+        device_map="auto",  # Auto-distribute across GPU/CPU
+        low_cpu_mem_usage=True  # Reduce RAM usage during load
+    )
+    # Get state dict
+    state_dict = model.state_dict()
+    # Estimate total size and shard dynamically
+    total_size = sum(t.element_size() * t.nelement() for t in state_dict.values())
+    num_shards = max(1, int(total_size / max_shard_size) + 1)  # At least 1 shard
+    # Distribute parameters by size, not count
+    shards = [{} for _ in range(num_shards)]
+    current_size = [0] * num_shards
+    shard_index = 0
+    for key, value in state_dict.items():
+        tensor_size = value.element_size() * value.nelement()
+        # Move to next shard if current one exceeds size limit
+        while current_size[shard_index] + tensor_size > max_shard_size and shard_index < num_shards - 1:
+            shard_index += 1
+        shards[shard_index][key] = value
+        current_size[shard_index] += tensor_size
+    # Save each shard
+    for i, shard in enumerate(shards):
+        if shard:  # Only save non-empty shards
+            shard_path = os.path.join(output_dir, f"model_shard_{i}.safetensors")
+            save_file(shard, shard_path)
+            print(f"Saved shard {i} to {shard_path}")
+    print(f"Model saved to {output_dir} with {len([s for s in shards if s])} shards")
+except Exception as e:
+    print(f"Error occurred: {str(e)}")
+finally:
+    # Clean up memory
+    if 'model' in locals():
+        del model
+        torch.cuda.empty_cache()  # Clear GPU memory if used