Spaces:
Running
Running
File size: 7,706 Bytes
04e6f1a d717efb 9887ae8 80dd465 5ed5cfe 2696a55 8464e89 04e6f1a 2696a55 f154634 8464e89 d717efb 2696a55 8464e89 d717efb f99e17c 2696a55 f99e17c f154634 2696a55 e92c772 2696a55 f99e17c e92c772 2696a55 bbc4bc2 8464e89 2696a55 d717efb f154634 d717efb f154634 d717efb 5fcff16 d717efb f154634 0ff1b21 b8dc8f6 d717efb a41bb55 d717efb 8464e89 f154634 8464e89 f154634 2fba160 f154634 8464e89 f154634 d717efb f154634 8464e89 53bcd0f 2696a55 f154634 53bcd0f 2696a55 f154634 53bcd0f f154634 2696a55 f154634 5f7c7b0 2696a55 f154634 8464e89 53bcd0f 5fcff16 d717efb f154634 d717efb 8464e89 5fcff16 f154634 8464e89 f154634 d717efb f154634 ec41d0e f154634 1bc7f4c ec41d0e f154634 d717efb f154634 2696a55 f154634 2696a55 f154634 2696a55 f154634 2696a55 f154634 1bc7f4c 2696a55 f154634 9887ae8 f154634 1bc7f4c f154634 9887ae8 036f81f 9887ae8 036f81f 9887ae8 f154634 9887ae8 f154634 5fcff16 f154634 d717efb 80dd465 d717efb 6de58c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import gradio as gr
import subprocess
import tempfile
import itertools
import os
import sys
import hashlib
import json
GHIDRA_PROJECT_DIR = f"{os.getenv('HOME')}/ghidra_project"
os.makedirs(GHIDRA_PROJECT_DIR, exist_ok=True)
def hash_file(file):
sha256_hash = hashlib.sha256()
with open(file, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def get_functions(file):
file_hash = hash_file(file)
with tempfile.TemporaryDirectory() as TEMP_DIR:
# First import the file
o = subprocess.run(
f"/ghidra/support/analyzeHeadless {GHIDRA_PROJECT_DIR} {file_hash} -import {file} 2>&1",
shell=True,
capture_output=True,
encoding="utf8"
)
if o.returncode != 0:
if not "Found conflicting program file in project:" in o.stdout:
raise gr.Error(f"Unable to run Ghidra on {file}: {o.stdout}")
o = subprocess.run(
f"/ghidra/support/analyzeHeadless {GHIDRA_PROJECT_DIR} {file_hash} -process -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json 2>&1",
shell=True,
capture_output=True,
encoding="utf8"
)
if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
raise gr.Error(f"DIRTY Ghidra failed to produce output: {o.stdout}")
json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
return json_funcs
with gr.Blocks() as demo:
state = gr.State()
intro = gr.Markdown(
"""
# DIRTY-Ghidra Inference Demo
Welcome! This is a demo of DIRTY-Ghidra, a tool that predict names and types for variables for Ghidra's decompiler.
To get started, upload a binary or select one of the example binaries below. Uploading a binary requires decompiling each function in the binary, which can take a few minutes.
## TODOs
* Make predictions for variables in non-unique storage locations
"""
)
file_widget = gr.File(label="Executable file")
with gr.Column(visible=False) as col:
# output = gr.Textbox("Output")
gr.Markdown(
"""
Great, you selected an executable! Now pick the function you would like
to analyze.
"""
)
fun_dropdown = gr.Dropdown(
label="Select a function", choices=["Woohoo!"], interactive=True
)
gr.Markdown(
"""
Below you can find some information.
"""
)
with gr.Row(visible=True) as result:
disassembly = gr.Code(
label="Disassembly", lines=20,
#min_width=400
)
original_decompile = gr.Code(
language="c",
label="Original Decompilation", lines=20,
#min_width=400
)
decompile = gr.Code(
language="c",
label="Renamed and retyped Decompilation",
lines=20,
#min_width=400
)
model_output = gr.JSON(
label="Model Output",
#min_width=400
)
# with gr.Column():
# clazz = gr.Label()
# interpret_button = gr.Button("Interpret (very slow)")
# interpretation = gr.components.Interpretation(disassembly)
example_widget = gr.Examples(
examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))],
inputs=file_widget,
outputs=[state, disassembly, original_decompile, decompile, model_output],
)
def file_change_fn(file):
if file is None:
return {col: gr.update(visible=False), state: {"file": None}}
else:
try:
progress = gr.Progress()
progress(
0,
desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...",
)
fun_data = get_functions(file.name)
# print(fun_data)
addrs = [
(f"{name} ({hex(int(addr))}; {numvars} vars)", int(addr))
for addr, (name, cf, numvars) in fun_data.items()
]
cfs = {name: cf for (name, cf, _numvars) in fun_data.values()}
except Exception as e:
raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
return {
col: gr.Column(visible=True),
fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
state: {"file": file,
"file_hash": hash_file(file.name),
"cfs": cfs},
}
def function_change_fn(selected_fun, state, progress=gr.Progress()):
# disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8")
# load_results = model.fn(disassembly_str)
# top_k = {e['label']: e['confidence'] for e in load_results['confidences']}
with tempfile.TemporaryDirectory() as TEMP_DIR:
progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
o = subprocess.run(
f"/ghidra/support/analyzeHeadless {GHIDRA_PROJECT_DIR} {state['file_hash']} -process -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun} 2>&1",
shell=True,
capture_output=True,
encoding="utf8"
)
if o.returncode != 0:
raise gr.Error(f"Unable to run Ghidra: {o.stdout}")
if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
raise gr.Error(f"DIRTY Ghidra failed to produce output: {o.stdout}")
try:
json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
except Exception as e:
raise gr.Error(f"Unable to parse DIRTY Ghidra output: {e}\n{o.stdout}")
if "exception" in json_info:
raise gr.Error(f"DIRTY Ghidra failed: {json_info['exception']}")
#print(json_info)
# group by location
src_filtered = json_info['other_info']['example_info']['source_filtered']
keyfunc = lambda x: x[1]
src_filtered = sorted(src_filtered.items(), key=keyfunc)
src_filtered = {k: [v1 for v1, v2 in v] for k, v in itertools.groupby(src_filtered, keyfunc)}
model_output_info = {
'model_output': json_info["model_output"],
'model_output_multi': json_info["model_output_multi"],
'dup_location_vars': src_filtered,
'other_outputs': json_info['other_info']['other_outputs']
}
return {
disassembly: gr.Textbox(value=json_info["disassembly"]),
original_decompile: gr.Textbox(value=json_info["original_decompile"]),
decompile: gr.Textbox(value=json_info["decompile"]),
model_output: gr.JSON(value=json.dumps(model_output_info)),
}
# Need to put intro as output to get progress to work!
file_widget.change(
file_change_fn, file_widget, outputs=[intro, state, col, fun_dropdown]
)
fun_dropdown.change(
function_change_fn,
inputs=[fun_dropdown, state],
outputs=[disassembly, original_decompile, decompile, model_output],
)
# spaces only shows stderr..
os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860)
|