elsagranger commited on
Commit
37d5164
·
verified ·
1 Parent(s): acc802b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +11 -4
README.md CHANGED
@@ -81,12 +81,19 @@ def calc_map_at_k(logits, pos_cnt, ks=[10,]):
81
  pos_asm_cnt = 1
82
 
83
  query = ["List all files in a directory"]
84
- anchor_asm = [...]
85
- neg_anchor_asm = [...]
 
 
86
 
87
  query_embs = text_encoder(**text_tokenizer(query))
88
- asm_embs = asm_encoder(**asm_tokenizer(anchor_asm))
89
- asm_neg_emb = asm_encoder(**asm_tokenizer(neg_anchor_asm))
 
 
 
 
 
90
 
91
  # query_embs: [query_cnt, emb_dim]
92
  # asm_embs: [pos_asm_cnt, emb_dim]
 
81
  pos_asm_cnt = 1
82
 
83
  query = ["List all files in a directory"]
84
+
85
+ # Extracted by the process_asm.py script mentioned above
86
+ anchor_asm = [ {"1": "endbr64", "2": "mov eax, 0" }, ... ]
87
+ neg_anchor_asm = [ {"1": "push rbp", "2": "mov rbp, rsp", ... }, ... ]
88
 
89
  query_embs = text_encoder(**text_tokenizer(query))
90
+
91
+ kwargs = dict(padding=True, pad_to_multiple_of=8, return_tensors="pt")
92
+ anchor_asm_ids = asm_tokenizer.pad([asm_tokenizer(pos) for pos in anchor_asm], **kwargs)
93
+ neg_anchor_asm_ids = asm_tokenizer.pad([asm_tokenizer(neg) for neg in neg_anchor_asm], **kwargs)
94
+
95
+ asm_embs = asm_encoder(**anchor_asm_ids)
96
+ asm_neg_emb = asm_encoder(**neg_anchor_asm_ids)
97
 
98
  # query_embs: [query_cnt, emb_dim]
99
  # asm_embs: [pos_asm_cnt, emb_dim]