Update README.md
Browse files
README.md
CHANGED
@@ -81,12 +81,19 @@ def calc_map_at_k(logits, pos_cnt, ks=[10,]):
|
|
81 |
pos_asm_cnt = 1
|
82 |
|
83 |
query = ["List all files in a directory"]
|
84 |
-
|
85 |
-
|
|
|
|
|
86 |
|
87 |
query_embs = text_encoder(**text_tokenizer(query))
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
# query_embs: [query_cnt, emb_dim]
|
92 |
# asm_embs: [pos_asm_cnt, emb_dim]
|
|
|
81 |
pos_asm_cnt = 1
|
82 |
|
83 |
query = ["List all files in a directory"]
|
84 |
+
|
85 |
+
# Extracted by the process_asm.py script mentioned above
|
86 |
+
anchor_asm = [ {"1": "endbr64", "2": "mov eax, 0" }, ... ]
|
87 |
+
neg_anchor_asm = [ {"1": "push rbp", "2": "mov rbp, rsp", ... }, ... ]
|
88 |
|
89 |
query_embs = text_encoder(**text_tokenizer(query))
|
90 |
+
|
91 |
+
kwargs = dict(padding=True, pad_to_multiple_of=8, return_tensors="pt")
|
92 |
+
anchor_asm_ids = asm_tokenizer.pad([asm_tokenizer(pos) for pos in anchor_asm], **kwargs)
|
93 |
+
neg_anchor_asm_ids = asm_tokenizer.pad([asm_tokenizer(neg) for neg in neg_anchor_asm], **kwargs)
|
94 |
+
|
95 |
+
asm_embs = asm_encoder(**anchor_asm_ids)
|
96 |
+
asm_neg_emb = asm_encoder(**neg_anchor_asm_ids)
|
97 |
|
98 |
# query_embs: [query_cnt, emb_dim]
|
99 |
# asm_embs: [pos_asm_cnt, emb_dim]
|