m-ric HF staff commited on
Commit
7b3d61a
Β·
1 Parent(s): 4cbfdf7

WOrking version

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -25,9 +25,12 @@ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
25
 
26
  print(f"Loading model {model_id}...")
27
 
28
- model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="cuda", use_safetensors=True)
29
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
30
  attnlrp.register(model)
 
31
  print(f"Loaded model.")
32
 
33
  def really_clean_tokens(tokens):
@@ -77,7 +80,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
77
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
78
 
79
  ### FIND ZONES OF INTEREST
80
- threshold_per_token = 0.05
81
  kernel_width = 6
82
  context_width = 20 # Number of tokens to include as context on each side
83
  kernel = np.ones((kernel_width, kernel_width))
 
25
 
26
  print(f"Loading model {model_id}...")
27
 
 
28
  tokenizer = AutoTokenizer.from_pretrained(model_id)
29
+ model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda", use_safetensors=True)
30
+ # model.gradient_checkpointing_enable()
31
+
32
  attnlrp.register(model)
33
+
34
  print(f"Loaded model.")
35
 
36
  def really_clean_tokens(tokens):
 
80
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
81
 
82
  ### FIND ZONES OF INTEREST
83
+ threshold_per_token = 0.1
84
  kernel_width = 6
85
  context_width = 20 # Number of tokens to include as context on each side
86
  kernel = np.ones((kernel_width, kernel_width))