import gradio as gr import os import re import subprocess import tempfile import transformers import numpy print(numpy.__version__) pipe = transformers.pipeline(model="ejschwartz/oo-method-test-model-bylibrary", top_k=2) # gr.load("ejschwartz/oo-method-test-model-bylibrary", src="models", examples=[ # '\nL1: 55 ?? push ebp\n 8b ec ?? mov ebp, esp\n 5d ?? pop ebp\n c3 ?? ret\n\n', # '\nL1: 55 ?? push ebp\n 8b ec ?? mov ebp, esp\n 51 ?? push ecx\n a1 b0 5d 43 00 ?? mov eax, dword ds:[0x00435db0]\n 83 f8 fe ?? cmp eax, 0xfe<254,-2>\n 75 0a ?? jne basic block L4\n\nL2: e8 4e 17 00 00 ?? call function 0x00415d25\n\nL3: a1 b0 5d 43 00 ?? mov eax, dword ds:[0x00435db0]\n\nL4: 83 f8 ff ?? cmp eax, 0xff<255,-1>\n 75 07 ?? jne basic block L6\n\nL5: b8 ff ff 00 00 ?? mov eax, 0x0000ffff\n eb 1b ?? jmp basic block L9\n\nL6: 6a 00 ?? push 0\n 8d 4d fc ?? lea ecx, ss:[ebp + 0xfc<252,-4>]\n 51 ?? push ecx\n 6a 01 ?? push 1\n 8d 4d 08 ?? lea ecx, ss:[ebp + 8]\n 51 ?? push ecx\n 50 ?? push eax\n ff 15 28 91 43 00 ?? call dword ds:[0x00439128]\n\nL7: 85 c0 ?? test eax, eax\n 74 e2 ?? je basic block L5\n\nL8: 66 8b 45 08 ?? mov ax, word ss:[ebp + 8]\n\nL9: 8b e5 ?? mov esp, ebp\n 5d ?? pop ebp\n c3 ?? ret\n\n', # '\nL1: 0f b7 41 32 ?? movzx eax, word ds:[ecx + 0x32<50>]\n 83 e8 20 ?? sub eax, 0x20<32>\n 74 2d ?? je basic block L10\n\nL2: 83 e8 03 ?? sub eax, 3\n 74 22 ?? je basic block L9\n\nL3: 83 e8 08 ?? sub eax, 8\n 74 17 ?? je basic block L8\n\nL4: 48 ?? dec eax\n 83 e8 01 ?? sub eax, 1\n 74 0b ?? je basic block L7\n\nL5: 83 e8 03 ?? sub eax, 3\n 75 1c ?? jne basic block L11\n\nL6: 83 49 20 08 ?? or dword ds:[ecx + 0x20<32>], 8\n eb 16 ?? jmp basic block L11\n\nL7: 83 49 20 04 ?? or dword ds:[ecx + 0x20<32>], 4\n eb 10 ?? jmp basic block L11\n\nL8: 83 49 20 01 ?? or dword ds:[ecx + 0x20<32>], 1\n eb 0a ?? jmp basic block L11\n\nL9: 83 49 20 20 ?? or dword ds:[ecx + 0x20<32>], 0x20<32>\n eb 04 ?? jmp basic block L11\n\nL10: 83 49 20 02 ?? or dword ds:[ecx + 0x20<32>], 2\n\nL11: b0 01 ?? mov al, 1\n c3 ?? ret\n\n', # "\nL1: 8b ff ?? mov edi, edi\n 55 ?? push ebp\n 8b ec ?? mov ebp, esp\n 83 ec 08 ?? sub esp, 8\n 89 4d f8 ?? mov dword ss:[ebp + 0xf8<248,-8>], ecx\n 8b 4d f8 ?? mov ecx, dword ss:[ebp + 0xf8<248,-8>]\n e8 e6 ac f9 ff ?? call function 0x00401569\n\nL2: 23 45 08 ?? and eax, dword ss:[ebp + 8]\n 3b 45 08 ?? cmp eax, dword ss:[ebp + 8]\n 75 09 ?? jne basic block L4\n\nL3: c7 45 fc 01 00 00 00 ?? mov dword ss:[ebp + 0xfc<252,-4>], 1\n eb 07 ?? jmp basic block L5\n\nL4: c7 45 fc 00 00 00 00 ?? mov dword ss:[ebp + 0xfc<252,-4>], 0\n\nL5: 8a 45 fc ?? mov al, byte ss:[ebp + 0xfc<252,-4>]\n 8b e5 ?? mov esp, ebp\n 5d ?? pop ebp\n c2 04 00 ?? ret 4\n\n" # ], # live=True, title="Is it a method or a function?").launch(server_name="0.0.0.0", server_port=7860) def get_all_dis(bname, addrs=None): anafile = tempfile.NamedTemporaryFile(prefix=os.path.basename(bname) + "_", suffix=".bat_ana") ananame = anafile.name addrstr = "" if addrs is not None: addrstr = " ".join([f"--function-at {x}" for x in addrs]) subprocess.check_output(f"bat-ana {addrstr} --no-post-analysis -o {ananame} {bname} 2>/dev/null", shell=True) output = subprocess.check_output(f"bat-dis --no-insn-address --no-bb-cfg-arrows --color=off {ananame} 2>/dev/null", shell=True) output = re.sub(b' +', b' ', output) func_dis = {} last_func = None current_output = [] for l in output.splitlines(): if l.startswith(b";;; function 0x"): if last_func is not None: func_dis[last_func] = b"\n".join(current_output) last_func = int(l.split()[2], 16) current_output.clear() if not b";;" in l: current_output.append(l) if last_func is not None: if last_func in func_dis: print("Warning: Ignoring multiple functions at the same address") else: func_dis[last_func] = b"\n".join(current_output) return func_dis def get_funs(f): funs = get_all_dis(f.name) return "\n".join(("%#x" % addr) for addr in funs.keys()) with gr.Blocks() as demo: all_dis_state = gr.State() gr.Markdown( """ # Function/Method Detector First, upload a binary. Then, select a function from the dropdown. The function's disassembly will be displayed below. """ ) file_widget = gr.File(label="Binary file") with gr.Column(visible=False) as col: #output = gr.Textbox("Output") fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True) with gr.Row(visible=True) as result: disassembly = gr.Textbox(label="Disassembly", lines=20) clazz = gr.Label() def file_change_fn(file): if file is None: return {col: gr.update(visible=False), all_dis_state: None} else: #fun_data = {42: 2, 43: 3} fun_data = get_all_dis(file.name) addrs = ["%#x" % addr for addr in fun_data.keys()] return {col: gr.update(visible=True), fun_dropdown: gr.Dropdown.update(choices=addrs, value=addrs[0]), all_dis_state: fun_data } def function_change_fn(selected_fun, fun_data): disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8") model_results = pipe(disassembly_str) top_k = {i["label"].split(", ")[0]: i["score"] for i in model_results} #disassembly_str = str(model_results) return {disassembly: gr.Textbox.update(value=disassembly_str), clazz: gr.Label.update(top_k) #clazz: gr.Label.update(value="Method" if model_results[0]["label"] == "method" else "Function")} } file_widget.change(file_change_fn, file_widget, [col, fun_dropdown, all_dis_state]) fun_dropdown.change(function_change_fn, [fun_dropdown, all_dis_state], [disassembly, clazz]) demo.launch(server_name="0.0.0.0", server_port=7860, share=True)