Improve the Python-based benchmarking script
Browse files- bench-TriLMs.py +11 -8
bench-TriLMs.py
CHANGED
@@ -44,12 +44,12 @@ def build_llama_cpp(options: Sequence[str]):
|
|
44 |
os.chdir(LLAMA_CPP_PATH)
|
45 |
builddir = LLAMA_CPP_PATH / "build"
|
46 |
if builddir.exists():
|
47 |
-
|
48 |
-
os.system("rm -
|
49 |
builddir.mkdir()
|
50 |
os.chdir(builddir)
|
51 |
os.system(shlex.join(("cmake", "..", *options)))
|
52 |
-
os.system("make -j llama-bench llama-quantize test-backend-ops")
|
53 |
|
54 |
|
55 |
def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZES):
|
@@ -103,11 +103,10 @@ def llama_bench(
|
|
103 |
"-o",
|
104 |
"json",
|
105 |
]
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
)
|
110 |
-
logger.debug(result.stderr)
|
111 |
|
112 |
new_output = json.loads(result.stdout)
|
113 |
logger.info(json.dumps(new_output, indent=4))
|
@@ -116,6 +115,7 @@ def llama_bench(
|
|
116 |
|
117 |
|
118 |
def test_backend_perf() -> str:
|
|
|
119 |
result = subprocess.run(
|
120 |
[
|
121 |
str(LLAMA_CPP_PATH / "build" / "bin" / "test-backend-ops"),
|
@@ -125,6 +125,7 @@ def test_backend_perf() -> str:
|
|
125 |
],
|
126 |
capture_output=True,
|
127 |
)
|
|
|
128 |
return result.stdout.decode(encoding="utf-8")
|
129 |
|
130 |
|
@@ -165,6 +166,8 @@ def parse_args(args: Sequence[str]):
|
|
165 |
if __name__ == "__main__":
|
166 |
args = parse_args(sys.argv)
|
167 |
|
|
|
|
|
168 |
LLAMA_CPP_PATH = args.llama_cpp_path
|
169 |
MODEL_DIR = args.model_dir
|
170 |
|
|
|
44 |
os.chdir(LLAMA_CPP_PATH)
|
45 |
builddir = LLAMA_CPP_PATH / "build"
|
46 |
if builddir.exists():
|
47 |
+
logger.info("Removing %s", builddir)
|
48 |
+
os.system("rm -rf build")
|
49 |
builddir.mkdir()
|
50 |
os.chdir(builddir)
|
51 |
os.system(shlex.join(("cmake", "..", *options)))
|
52 |
+
os.system(f"make -j{os.cpu_count()} llama-bench llama-quantize test-backend-ops")
|
53 |
|
54 |
|
55 |
def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZES):
|
|
|
103 |
"-o",
|
104 |
"json",
|
105 |
]
|
106 |
+
command = [str(LLAMA_CPP_PATH / "build" / "bin" / "llama-bench")] + args
|
107 |
+
logger.info("Running: %s", " ".join(command))
|
108 |
+
result = subprocess.run(command, capture_output=True)
|
109 |
+
logger.debug(result.stderr.decode())
|
|
|
110 |
|
111 |
new_output = json.loads(result.stdout)
|
112 |
logger.info(json.dumps(new_output, indent=4))
|
|
|
115 |
|
116 |
|
117 |
def test_backend_perf() -> str:
|
118 |
+
logger.info("Test MUL_MAT performance")
|
119 |
result = subprocess.run(
|
120 |
[
|
121 |
str(LLAMA_CPP_PATH / "build" / "bin" / "test-backend-ops"),
|
|
|
125 |
],
|
126 |
capture_output=True,
|
127 |
)
|
128 |
+
logger.debug(result.stdout.decode())
|
129 |
return result.stdout.decode(encoding="utf-8")
|
130 |
|
131 |
|
|
|
166 |
if __name__ == "__main__":
|
167 |
args = parse_args(sys.argv)
|
168 |
|
169 |
+
logging.basicConfig(level=logging.DEBUG)
|
170 |
+
|
171 |
LLAMA_CPP_PATH = args.llama_cpp_path
|
172 |
MODEL_DIR = args.model_dir
|
173 |
|