Spaces:
Running
Running
File size: 5,799 Bytes
04e6f1a d717efb 80dd465 5ed5cfe 8464e89 04e6f1a f154634 8464e89 d717efb 8464e89 d717efb e92c772 f154634 e92c772 2fba160 e92c772 bbc4bc2 8464e89 d717efb f154634 d717efb f154634 d717efb 5fcff16 d717efb f154634 f4329cf b8dc8f6 d717efb a41bb55 d717efb 8464e89 f154634 8464e89 f154634 2fba160 f154634 8464e89 f154634 d717efb f154634 8464e89 53bcd0f 5f7c7b0 f154634 53bcd0f 5f7c7b0 f154634 53bcd0f f154634 5f7c7b0 f154634 8464e89 53bcd0f 5fcff16 d717efb f154634 d717efb 8464e89 5fcff16 f154634 8464e89 f154634 d717efb f154634 1bc7f4c f154634 1bc7f4c f154634 d717efb f154634 1bc7f4c f154634 1bc7f4c f154634 1bc7f4c f154634 1bc7f4c f154634 1bc7f4c f154634 1bc7f4c f154634 5f7c7b0 f154634 5fcff16 f154634 d717efb 80dd465 d717efb 6de58c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
import subprocess
import tempfile
import os
import sys
import json
def get_functions(file):
with tempfile.TemporaryDirectory() as TEMP_DIR:
try:
output = subprocess.check_output(
f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json",
shell=True,
)
except Exception as e:
raise gr.Error(f"Unable to run Ghidra on {file}: {e}")
if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
raise gr.Error(f"DIRTY Ghidra failed to produce output: {output}")
json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
return json_funcs
with gr.Blocks() as demo:
state = gr.State()
intro = gr.Markdown(
"""
# DIRTY-Ghidra Inference Demo
Welcome! This is a demo of DIRTY-Ghidra, a tool that predict names and types for variables for Ghidra's decompiler.
To get started, upload a binary or select one of the example binaries below.
## TODOs
* Avoid re-running Ghidra when changing the function
* Make predictions for variables in non-unique storage locations
"""
)
file_widget = gr.File(label="Executable file")
with gr.Column(visible=False) as col:
# output = gr.Textbox("Output")
gr.Markdown(
"""
Great, you selected an executable! Now pick the function you would like
to analyze.
"""
)
fun_dropdown = gr.Dropdown(
label="Select a function", choices=["Woohoo!"], interactive=True
)
gr.Markdown(
"""
Below you can find some information.
"""
)
with gr.Row(visible=True) as result:
disassembly = gr.Code(
label="Disassembly", lines=20
)
original_decompile = gr.Code(
language="c",
label="Original Decompilation", lines=20
)
decompile = gr.Code(
language="c",
label="Renamed and retyped Decompilation",
lines=20,
)
model_output = gr.JSON(
label="Model Output"
)
# with gr.Column():
# clazz = gr.Label()
# interpret_button = gr.Button("Interpret (very slow)")
# interpretation = gr.components.Interpretation(disassembly)
example_widget = gr.Examples(
examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))],
inputs=file_widget,
outputs=[state, disassembly, original_decompile, decompile, model_output],
)
def file_change_fn(file):
if file is None:
return {col: gr.update(visible=False), state: {"file": None}}
else:
try:
progress = gr.Progress()
progress(
0,
desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...",
)
fun_data = get_functions(file.name)
# print(fun_data)
addrs = [
(f"{name} ({hex(int(addr))})", int(addr))
for addr, (name, cf) in fun_data.items()
]
cfs = {name: cf for (name, cf) in fun_data.values()}
except Exception as e:
raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
return {
col: gr.Column(visible=True),
fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
state: {"file": file, "cfs": cfs},
}
def function_change_fn(selected_fun, state, progress=gr.Progress()):
# disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8")
# load_results = model.fn(disassembly_str)
# top_k = {e['label']: e['confidence'] for e in load_results['confidences']}
with tempfile.TemporaryDirectory() as TEMP_DIR:
progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
try:
output = subprocess.check_output(
f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {state['file'].name} -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun}",
shell=True,
)
except Exception as e:
raise gr.Error(f"Unable to run Ghidra: {e}\n{output}")
if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
raise gr.Error(f"DIRTY Ghidra failed to produce output: {output}")
json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
if "exception" in json_info:
raise gr.Error(f"DIRTY Ghidra failed: {json_info['exception']}")
#print(json_info)
return {
disassembly: gr.Textbox(value=json_info["disassembly"]),
original_decompile: gr.Textbox(value=json_info["original_decompile"]),
decompile: gr.Textbox(value=json_info["decompile"]),
model_output: gr.JSON(value=json_info["model_output"]),
}
# Need to put intro as output to get progress to work!
file_widget.change(
file_change_fn, file_widget, outputs=[intro, state, col, fun_dropdown]
)
fun_dropdown.change(
function_change_fn,
inputs=[fun_dropdown, state],
outputs=[disassembly, original_decompile, decompile, model_output],
)
# spaces only shows stderr..
os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860)
|