Spaces:
Sleeping
Sleeping
Commit
·
9019f12
1
Parent(s):
47abc2a
debug tokenizer not found
Browse files
app.py
CHANGED
@@ -2,6 +2,42 @@
|
|
2 |
import gradio as gr
|
3 |
|
4 |
# ==================================================================================-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# ==================================================================================-
|
6 |
# ==================================================================================-
|
7 |
# ==================================================================================-
|
@@ -435,7 +471,7 @@ class ChatFormat:
|
|
435 |
return self.tokenizer.decode(token_ids)
|
436 |
|
437 |
# tokenizer = Tokenizer("tokenizer.json")
|
438 |
-
chat_tokenizer = ChatFormat(tokenizer)
|
439 |
|
440 |
# text = "नेपाल विद्युत प्राधिकरणका कार्यकारी निर्देशक कुलमान घिसिङले माथिल्लो अरुण जलविद्युत आयोजना विश्व बैंक र एडीबीबाट वित्तीय व्यवस्थापन नभए नेपाली जनताको लगानीमा बनाउने तयारी रहेको बताएका छन् ।"
|
441 |
# # normal tokenizer
|
@@ -870,40 +906,7 @@ def generate_chat_optimized(
|
|
870 |
# generate_and_print_sample
|
871 |
# )
|
872 |
|
873 |
-
# -------------------------------------------------
|
874 |
-
# 1. Download the model weights (from huggingface)
|
875 |
-
# -------------------------------------------------
|
876 |
-
from huggingface_hub import hf_hub_download
|
877 |
-
hf_hub_download(repo_id="Aananda-giri/LLAMA3-Nepali", filename="parameters_300m/model_pg_398000_steps.pth", local_dir="./")
|
878 |
|
879 |
-
# ----------------------
|
880 |
-
# 2. Load The tokenizer
|
881 |
-
# ----------------------
|
882 |
-
from transformers import PreTrainedTokenizerFast
|
883 |
-
|
884 |
-
# Load the tokenizer
|
885 |
-
tokenizer = PreTrainedTokenizerFast.from_pretrained("Aananda-giri/LLAMA3-Nepali")
|
886 |
-
tokenizer.save_pretrained("NepaliBPE")
|
887 |
-
|
888 |
-
|
889 |
-
# Llama 3.2 ~300M Scaled Version
|
890 |
-
LLAMA32_CONFIG = {
|
891 |
-
"vocab_size": 50006, # <len(tokenizer.tokenizer)=50006> 128_256 reduced vocabulary size
|
892 |
-
"context_length": 512, # 131_072 reduced Context length (unrelated to model size but higheer context length consumes more RAM)
|
893 |
-
"emb_dim": 1320, # 2048 reduced Embedding dimension
|
894 |
-
"n_heads": 20, # 32 reduced Number of attention heads
|
895 |
-
"n_layers": 10, # 16 reduced Number of layers
|
896 |
-
"hidden_dim": 5280, # 8192 Size of the intermediate dimension in FeedForward
|
897 |
-
"n_kv_groups": 5, # 8 Key-Value groups for grouped-query attention
|
898 |
-
"rope_base": 500_000.0, # 500_000 The base in RoPE's "theta"
|
899 |
-
"dtype": torch.bfloat16, # Lower-precision dtype to reduce memory usage
|
900 |
-
"rope_freq": { # RoPE frequency scaling
|
901 |
-
"factor": 32.0,
|
902 |
-
"low_freq_factor": 1.0,
|
903 |
-
"high_freq_factor": 4.0,
|
904 |
-
"original_context_length": 8192,
|
905 |
-
}
|
906 |
-
}
|
907 |
|
908 |
old_context_length = 131_072 # original context length of llama3.2 model
|
909 |
new_context_length = LLAMA32_CONFIG["context_length"] # 512 our new context length
|
|
|
2 |
import gradio as gr
|
3 |
|
4 |
# ==================================================================================-
|
5 |
+
# inference code part-1
|
6 |
+
# -------------------------------------------------
|
7 |
+
# 1. Download the model weights (from huggingface)
|
8 |
+
# -------------------------------------------------
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
+
hf_hub_download(repo_id="Aananda-giri/LLAMA3-Nepali", filename="parameters_300m/model_pg_398000_steps.pth", local_dir="./")
|
11 |
+
|
12 |
+
# ----------------------
|
13 |
+
# 2. Load The tokenizer
|
14 |
+
# ----------------------
|
15 |
+
from transformers import PreTrainedTokenizerFast
|
16 |
+
|
17 |
+
# Load the tokenizer
|
18 |
+
tokenizer = PreTrainedTokenizerFast.from_pretrained("Aananda-giri/LLAMA3-Nepali")
|
19 |
+
tokenizer.save_pretrained("NepaliBPE")
|
20 |
+
|
21 |
+
|
22 |
+
# Llama 3.2 ~300M Scaled Version
|
23 |
+
LLAMA32_CONFIG = {
|
24 |
+
"vocab_size": 50006, # <len(tokenizer.tokenizer)=50006> 128_256 reduced vocabulary size
|
25 |
+
"context_length": 512, # 131_072 reduced Context length (unrelated to model size but higheer context length consumes more RAM)
|
26 |
+
"emb_dim": 1320, # 2048 reduced Embedding dimension
|
27 |
+
"n_heads": 20, # 32 reduced Number of attention heads
|
28 |
+
"n_layers": 10, # 16 reduced Number of layers
|
29 |
+
"hidden_dim": 5280, # 8192 Size of the intermediate dimension in FeedForward
|
30 |
+
"n_kv_groups": 5, # 8 Key-Value groups for grouped-query attention
|
31 |
+
"rope_base": 500_000.0, # 500_000 The base in RoPE's "theta"
|
32 |
+
"dtype": torch.bfloat16, # Lower-precision dtype to reduce memory usage
|
33 |
+
"rope_freq": { # RoPE frequency scaling
|
34 |
+
"factor": 32.0,
|
35 |
+
"low_freq_factor": 1.0,
|
36 |
+
"high_freq_factor": 4.0,
|
37 |
+
"original_context_length": 8192,
|
38 |
+
}
|
39 |
+
}
|
40 |
+
|
41 |
# ==================================================================================-
|
42 |
# ==================================================================================-
|
43 |
# ==================================================================================-
|
|
|
471 |
return self.tokenizer.decode(token_ids)
|
472 |
|
473 |
# tokenizer = Tokenizer("tokenizer.json")
|
474 |
+
# chat_tokenizer = ChatFormat(tokenizer)
|
475 |
|
476 |
# text = "नेपाल विद्युत प्राधिकरणका कार्यकारी निर्देशक कुलमान घिसिङले माथिल्लो अरुण जलविद्युत आयोजना विश्व बैंक र एडीबीबाट वित्तीय व्यवस्थापन नभए नेपाली जनताको लगानीमा बनाउने तयारी रहेको बताएका छन् ।"
|
477 |
# # normal tokenizer
|
|
|
906 |
# generate_and_print_sample
|
907 |
# )
|
908 |
|
|
|
|
|
|
|
|
|
|
|
909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
|
911 |
old_context_length = 131_072 # original context length of llama3.2 model
|
912 |
new_context_length = LLAMA32_CONFIG["context_length"] # 512 our new context length
|