Spaces:
Runtime error
Runtime error
WOrking version
Browse files
app.py
CHANGED
@@ -25,9 +25,12 @@ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
|
25 |
|
26 |
print(f"Loading model {model_id}...")
|
27 |
|
28 |
-
model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="cuda", use_safetensors=True)
|
29 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
|
|
|
30 |
attnlrp.register(model)
|
|
|
31 |
print(f"Loaded model.")
|
32 |
|
33 |
def really_clean_tokens(tokens):
|
@@ -77,7 +80,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
77 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
78 |
|
79 |
### FIND ZONES OF INTEREST
|
80 |
-
threshold_per_token = 0.
|
81 |
kernel_width = 6
|
82 |
context_width = 20 # Number of tokens to include as context on each side
|
83 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
25 |
|
26 |
print(f"Loading model {model_id}...")
|
27 |
|
|
|
28 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
29 |
+
model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda", use_safetensors=True)
|
30 |
+
# model.gradient_checkpointing_enable()
|
31 |
+
|
32 |
attnlrp.register(model)
|
33 |
+
|
34 |
print(f"Loaded model.")
|
35 |
|
36 |
def really_clean_tokens(tokens):
|
|
|
80 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
81 |
|
82 |
### FIND ZONES OF INTEREST
|
83 |
+
threshold_per_token = 0.1
|
84 |
kernel_width = 6
|
85 |
context_width = 20 # Number of tokens to include as context on each side
|
86 |
kernel = np.ones((kernel_width, kernel_width))
|