ejschwartz commited on
Commit
1bc7f4c
·
1 Parent(s): b7507e3

Filter out trivial examples

Browse files
Files changed (2) hide show
  1. main.py +12 -9
  2. scripts/dump_functions.py +15 -1
main.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- import shutil
3
  import subprocess
4
  import tempfile
5
  import os
@@ -107,15 +106,17 @@ with gr.Blocks() as demo:
107
 
108
  addrs = [
109
  (f"{name} ({hex(int(addr))})", int(addr))
110
- for addr, name in fun_data.items()
111
  ]
 
 
112
  except Exception as e:
113
  raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
114
 
115
  return {
116
  col: gr.Column(visible=True),
117
  fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
118
- state: {"file": file},
119
  }
120
 
121
  def function_change_fn(selected_fun, state, progress=gr.Progress()):
@@ -131,18 +132,20 @@ with gr.Blocks() as demo:
131
  progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
132
 
133
  try:
134
- subprocess.run(
135
  f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {state['file'].name} -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun}",
136
  shell=True,
137
  )
 
 
138
 
139
- json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
 
140
 
141
- if "exception" in json_info:
142
- raise gr.Error(f"DIRTY Ghidra failed: {json_info['exception']}")
143
 
144
- except Exception as e:
145
- raise gr.Error(f"Unable to run DIRTY Ghidra: {e}")
146
 
147
  #print(json_info)
148
 
 
1
  import gradio as gr
 
2
  import subprocess
3
  import tempfile
4
  import os
 
106
 
107
  addrs = [
108
  (f"{name} ({hex(int(addr))})", int(addr))
109
+ for addr, (name, cf) in fun_data.items()
110
  ]
111
+
112
+ cfs = {name: cf for (name, cf) in fun_data.values()}
113
  except Exception as e:
114
  raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
115
 
116
  return {
117
  col: gr.Column(visible=True),
118
  fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
119
+ state: {"file": file, "cfs": cfs},
120
  }
121
 
122
  def function_change_fn(selected_fun, state, progress=gr.Progress()):
 
132
  progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
133
 
134
  try:
135
+ output = subprocess.check_output(
136
  f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {state['file'].name} -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun}",
137
  shell=True,
138
  )
139
+ except Exception as e:
140
+ raise gr.Error(f"Unable to run Ghidra: {e}\n{output}")
141
 
142
+ if not os.path.exists(f"{TEMP_DIR}/funcs.json"):
143
+ raise gr.Error(f"DIRTY Ghidra failed to produce output: {output}")
144
 
145
+ json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
 
146
 
147
+ if "exception" in json_info:
148
+ raise gr.Error(f"DIRTY Ghidra failed: {json_info['exception']}")
149
 
150
  #print(json_info)
151
 
scripts/dump_functions.py CHANGED
@@ -1,4 +1,11 @@
1
  import json
 
 
 
 
 
 
 
2
 
3
  def dump_functions_to_json():
4
  functionManager = currentProgram().getFunctionManager()
@@ -15,8 +22,15 @@ def dump_functions_to_json():
15
  func_name = func.getName()
16
  func_address = func.getEntryPoint().getOffset()
17
 
 
 
 
 
 
 
18
  # Add function name and address to the dictionary
19
- functions_dict[func_address] = func_name
 
20
 
21
  # Convert the dictionary to a JSON object
22
  json_data = json.dumps(functions_dict, indent=4)
 
1
  import json
2
+ import sys
3
+
4
+ sys.path.append("/DIRTY/dirty")
5
+
6
+ import utils.infer
7
+ from utils.dataset import Example
8
+
9
 
10
  def dump_functions_to_json():
11
  functionManager = currentProgram().getFunctionManager()
 
22
  func_name = func.getName()
23
  func_address = func.getEntryPoint().getOffset()
24
 
25
+ cf = utils.infer.ghidra_obtain_cf(func)
26
+
27
+ example = Example.from_cf(
28
+ cf, binary_file="binary_file", max_stack_length=1024, max_type_size=1024
29
+ )
30
+
31
  # Add function name and address to the dictionary
32
+ if example.is_valid_example:
33
+ functions_dict[func_address] = (func_name, cf.to_json())
34
 
35
  # Convert the dictionary to a JSON object
36
  json_data = json.dumps(functions_dict, indent=4)