ZeppelinCorp
/

Charm_15

@@ -1,128 +1,51 @@
-from safetensors.torch import load_file, save_file
 import torch
-from typing import List, Dict, Optional
-import logging
-from tqdm import tqdm
-import os
-import hashlib
-from concurrent.futures import ThreadPoolExecutor, as_completed
-# Configure logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-logger = logging.getLogger(__name__)
-def calculate_checksum(file_path: str) -> str:
-    """
-    Calculate the SHA-256 checksum of a file.
-    Args:
-        file_path (str): Path to the file.
-    Returns:
-        str: SHA-256 checksum of the file.
-    """
-    sha256 = hashlib.sha256()
-    with open(file_path, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            sha256.update(chunk)
-    return sha256.hexdigest()
-def verify_checksums(model_parts: List[str], expected_checksums: List[str]) -> None:
-    """
-    Verify the checksums of model part files.
-    Args:
-        model_parts (list): List of model part file paths.
-        expected_checksums (list): List of expected checksums for each part.
-    Raises:
-        RuntimeError: If any checksum does not match.
-    """
-    for part, expected_checksum in zip(model_parts, expected_checksums):
-        actual_checksum = calculate_checksum(part)
-        if actual_checksum != expected_checksum:
-            raise RuntimeError(f"Checksum mismatch for {part}: expected {expected_checksum}, got {actual_checksum}")
-def load_part(part: str) -> Dict[str, torch.Tensor]:
-    """
-    Load a single model part.
-    Args:
-        part (str): Path to the model part file.
-    Returns:
-        dict: State dictionary of the model part.
-    """
-    return load_file(part)
-def load_charm_model(model_parts: List[str], expected_checksums: Optional[List[str]] = None) -> Dict[str, torch.Tensor]:
-    """
-    Load and merge multiple .safetensors model files.
-    Args:
-        model_parts (list): List of model part file paths (e.g., ["model-1-of-10.safetensors", ...]).
-        expected_checksums (list, optional): List of expected checksums for each part.
-    Returns:
-        dict: Merged model state dictionary.
-    Raises:
-        FileNotFoundError: If any model part file is missing.
-        RuntimeError: If there is an issue loading or merging the model parts.
-    """
-    merged_state_dict = {}
-    # Check if all model parts exist
-    for part in model_parts:
-        if not os.path.exists(part):
-            raise FileNotFoundError(f"Model part not found: {part}")
-    # Verify checksums if provided
-    if expected_checksums:
-        logger.info("Verifying checksums...")
-        verify_checksums(model_parts, expected_checksums)
-        logger.info("Checksums verified successfully.")
-    # Load and merge model parts in parallel
-    try:
-        logger.info("Loading and merging model parts...")
-        with ThreadPoolExecutor() as executor:
-            futures = {executor.submit(load_part, part): part for part in model_parts}
-            for future in tqdm(as_completed(futures), total=len(futures), desc="Loading model parts"):
-                part = futures[future]
-                try:
-                    state_dict = future.result()
-                    merged_state_dict.update(state_dict)  # Merge parameters
-                    logger.debug(f"Loaded part: {part}")
-                except Exception as e:
-                    logger.error(f"Error loading part {part}: {e}")
-                    raise RuntimeError(f"Failed to load part: {part}")
-        logger.info("Model parts loaded and merged successfully.")
-        return merged_state_dict
-    except Exception as e:
-        logger.error(f"Error loading or merging model parts: {e}")
-        raise RuntimeError("Failed to load or merge model parts.")
 # Example usage
-if __name__ == "__main__":
-    try:
-        # List of model part files
-        model_files = [f"model-{i}-of-10.safetensors" for i in range(1, 11)]
-        # Optional: List of expected checksums for each part
-        expected_checksums = [
-            "checksum_for_model-1-of-10.safetensors",
-            "checksum_for_model-2-of-10.safetensors",
-            # Add checksums for all parts...
-        ]
-        # Load and merge the model
-        charm_model = load_charm_model(model_files, expected_checksums)
-        # Save the merged model as a .safetensors file
-        output_file = "merged_model.safetensors"
-        save_file(charm_model, output_file)
-        logger.info(f"Merged model saved as '{output_file}'.")
-    except Exception as e:
-        logger.error(f"An error occurred: {e}")

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from safetensors.torch import load_file
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+# Specify the model name and safetensors file path
+MODEL_NAME = "mistral-8x7B"
+SAFETENSORS_PATH = "path_to_your_model.safetensors"
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# Initialize an empty model (no weights loaded yet)
+with init_empty_weights():
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+# Load the model weights from the safetensors file
+model_weights = load_file(SAFETENSORS_PATH)
+# Use Hugging Face's `accelerate` to load the model efficiently
+# This allows for sharding and offloading to CPU/disk if needed
+model = load_checkpoint_and_dispatch(
+    model,
+    SAFETENSORS_PATH,
+    device_map="auto",  # Automatically handles GPU/CPU offloading
+    no_split_module_classes=["MistralLayer"],  # Specify layers not to split
+    dtype=torch.float16,  # Use mixed precision for memory efficiency
+)
+# Move the model to the appropriate device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
 # Example usage
+input_text = "Hello, how are you?"
+inputs = tokenizer(input_text, return_tensors="pt").to(device)
+# Generate output with efficient memory usage
+with torch.no_grad():
+    outputs = model.generate(
+        inputs["input_ids"],
+        max_length=50,
+        num_return_sequences=1,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+    )
+# Decode and print the output
+generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print("Generated Text:", generated_text)