rasbt
/

llama-3.2-from-scratch

Model card Files Files and versions Community

rasbt commited on Mar 31

Commit

3e575a3

·

verified ·

1 Parent(s): a01bfd8

Delete main.py

Files changed (1) hide show

main.py +0 -80

main.py DELETED Viewed

@@ -1,80 +0,0 @@
-# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
-# Source for "Build a Large Language Model From Scratch"
-# https://github.com/rasbt/LLMs-from-scratch/blob/main/ch05/07_gpt_to_llama/standalone-llama32.ipynb
-import time
-import torch
-from model import Llama3Model, generate, text_to_token_ids, token_ids_to_text
-from tokenizer import Llama3Tokenizer, ChatFormat, clean_text
-#######################################
-# Model settings
-MODEL_FILE = "llama3.2-1B-instruct.pth"
-# MODEL_FILE = "llama3.2-1B-base.pth"
-# MODEL_FILE = "llama3.2-3B-instruct.pth"
-# MODEL_FILE = "llama3.2-3B-base.pth"
-MODEL_CONTEXT_LENGTH = 8192  # Supports up to 131_072
-# Text generation settings
-if "instruct" in MODEL_FILE:
-    PROMPT = "What do llamas eat?"
-else:
-    PROMPT = "Llamas eat"
-MAX_NEW_TOKENS = 150
-TEMPERATURE = 0.
-TOP_K = 1
-#######################################
-if "1B" in MODEL_FILE:
-    from model import LLAMA32_CONFIG_1B as LLAMA32_CONFIG
-elif "3B" in MODEL_FILE:
-    from model import LLAMA32_CONFIG_3B as LLAMA32_CONFIG
-else:
-    raise ValueError("Incorrect model file name")
-model = Llama3Model(LLAMA32_CONFIG)
-tokenizer = Tokenizer("tokenizer.model")
-if "instruct" in MODEL_FILE:
-    tokenizer = ChatFormat(tokenizer)
-model.load_state_dict(torch.load(MODEL_FILE, weights_only=True))
-device = (
-    torch.device("cuda") if torch.cuda.is_available() else
-    torch.device("mps") if torch.backends.mps.is_available() else
-    torch.device("cpu")
-)
-model.to(device)
-torch.manual_seed(123)
-start = time.time()
-token_ids = generate(
-    model=model,
-    idx=text_to_token_ids(PROMPT, tokenizer).to(device),
-    max_new_tokens=MAX_NEW_TOKENS,
-    context_size=LLAMA32_CONFIG["context_length"],
-    top_k=TOP_K,
-    temperature=TEMPERATURE
-)
-print(f"Time: {time.time() - start:.2f} sec")
-if torch.cuda.is_available():
-    max_mem_bytes = torch.cuda.max_memory_allocated()
-    max_mem_gb = max_mem_bytes / (1024 ** 3)
-    print(f"Max memory allocated: {max_mem_gb:.2f} GB")
-output_text = token_ids_to_text(token_ids, tokenizer)
-if "instruct" in MODEL_FILE:
-    output_text = clean_text(output_text)
-print("\n\nOutput text:\n\n", output_text)