ejschwartz commited on
Commit
2696a55
·
1 Parent(s): 2fba160

avoid re-importing

Browse files
Files changed (1) hide show
  1. main.py +38 -18
main.py CHANGED
@@ -3,27 +3,41 @@ import subprocess
3
  import tempfile
4
  import os
5
  import sys
 
6
  import json
7
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def get_functions(file):
10
 
 
 
11
  with tempfile.TemporaryDirectory() as TEMP_DIR:
12
 
13
- try:
14
- output = subprocess.check_output(
15
- f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json",
16
  shell=True,
 
 
17
  )
18
- except Exception as e:
19
- raise gr.Error(f"Unable to run Ghidra on {file}: {e}")
20
 
21
  if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
22
- raise gr.Error(f"DIRTY Ghidra failed to produce output: {output}")
23
 
24
  json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
25
 
26
- return json_funcs
27
 
28
 
29
  with gr.Blocks() as demo:
@@ -38,7 +52,6 @@ with gr.Blocks() as demo:
38
  To get started, upload a binary or select one of the example binaries below.
39
 
40
  ## TODOs
41
- * Avoid re-running Ghidra when changing the function
42
  * Make predictions for variables in non-unique storage locations
43
  """
44
  )
@@ -67,19 +80,23 @@ with gr.Blocks() as demo:
67
 
68
  with gr.Row(visible=True) as result:
69
  disassembly = gr.Code(
70
- label="Disassembly", lines=20
 
71
  )
72
  original_decompile = gr.Code(
73
  language="c",
74
- label="Original Decompilation", lines=20
 
75
  )
76
  decompile = gr.Code(
77
  language="c",
78
  label="Renamed and retyped Decompilation",
79
  lines=20,
 
80
  )
81
  model_output = gr.JSON(
82
- label="Model Output"
 
83
  )
84
  # with gr.Column():
85
  # clazz = gr.Label()
@@ -119,7 +136,9 @@ with gr.Blocks() as demo:
119
  return {
120
  col: gr.Column(visible=True),
121
  fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
122
- state: {"file": file, "cfs": cfs},
 
 
123
  }
124
 
125
  def function_change_fn(selected_fun, state, progress=gr.Progress()):
@@ -132,16 +151,17 @@ with gr.Blocks() as demo:
132
 
133
  progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
134
 
135
- try:
136
- output = subprocess.check_output(
137
- f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {state['file'].name} -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun}",
138
  shell=True,
 
 
139
  )
140
- except Exception as e:
141
- raise gr.Error(f"Unable to run Ghidra: {e}\n{output}")
142
 
143
  if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
144
- raise gr.Error(f"DIRTY Ghidra failed to produce output: {output}")
145
 
146
  json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
147
 
 
3
  import tempfile
4
  import os
5
  import sys
6
+ import hashlib
7
  import json
8
 
9
+ GHIDRA_PROJECT_DIR = f"{os.getenv('HOME')}/ghidra_project"
10
+
11
+ os.makedirs(GHIDRA_PROJECT_DIR, exist_ok=True)
12
+
13
+ def hash_file(file):
14
+ sha256_hash = hashlib.sha256()
15
+ with open(file, "rb") as f:
16
+ for byte_block in iter(lambda: f.read(4096), b""):
17
+ sha256_hash.update(byte_block)
18
+ return sha256_hash.hexdigest()
19
 
20
  def get_functions(file):
21
 
22
+ file_hash = hash_file(file)
23
+
24
  with tempfile.TemporaryDirectory() as TEMP_DIR:
25
 
26
+ o = subprocess.run(
27
+ f"/ghidra/support/analyzeHeadless {GHIDRA_PROJECT_DIR} {file_hash} -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json 2>&1",
 
28
  shell=True,
29
+ capture_output=True,
30
+ encoding="utf8"
31
  )
32
+ if o.returncode != 0:
33
+ raise gr.Error(f"Unable to run Ghidra on {file}: {o.stdout}")
34
 
35
  if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
36
+ raise gr.Error(f"DIRTY Ghidra failed to produce output: {o.stdout}")
37
 
38
  json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
39
 
40
+ return json_funcs
41
 
42
 
43
  with gr.Blocks() as demo:
 
52
  To get started, upload a binary or select one of the example binaries below.
53
 
54
  ## TODOs
 
55
  * Make predictions for variables in non-unique storage locations
56
  """
57
  )
 
80
 
81
  with gr.Row(visible=True) as result:
82
  disassembly = gr.Code(
83
+ label="Disassembly", lines=20,
84
+ #min_width=400
85
  )
86
  original_decompile = gr.Code(
87
  language="c",
88
+ label="Original Decompilation", lines=20,
89
+ #min_width=400
90
  )
91
  decompile = gr.Code(
92
  language="c",
93
  label="Renamed and retyped Decompilation",
94
  lines=20,
95
+ #min_width=400
96
  )
97
  model_output = gr.JSON(
98
+ label="Model Output",
99
+ #min_width=400
100
  )
101
  # with gr.Column():
102
  # clazz = gr.Label()
 
136
  return {
137
  col: gr.Column(visible=True),
138
  fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
139
+ state: {"file": file,
140
+ "file_hash": hash_file(file.name),
141
+ "cfs": cfs},
142
  }
143
 
144
  def function_change_fn(selected_fun, state, progress=gr.Progress()):
 
151
 
152
  progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
153
 
154
+ o = subprocess.run(
155
+ f"/ghidra/support/analyzeHeadless {GHIDRA_PROJECT_DIR} {state['file_hash']} -process -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun} 2>&1",
 
156
  shell=True,
157
+ capture_output=True,
158
+ encoding="utf8"
159
  )
160
+ if o.returncode != 0:
161
+ raise gr.Error(f"Unable to run Ghidra: {o.stdout}")
162
 
163
  if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
164
+ raise gr.Error(f"DIRTY Ghidra failed to produce output: {o.stdout}")
165
 
166
  json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
167