CISCai
/

Codestral-22B-v0.1-SOTA-GGUF

Model card Files Files and versions Community

CISCai commited on Jun 4, 2024

Commit

a05ca96

·

verified ·

1 Parent(s): a8fd1a1

Improve FIM example

Files changed (1) hide show

README.md +21 -5

README.md CHANGED Viewed

@@ -185,13 +185,29 @@ from llama_cpp import Llama
 # Completion API
 llm = Llama(model_path="./Codestral-22B-v0.1.IQ4_XS.gguf", n_gpu_layers=57, n_ctx=32768, spm_infill=True)
-print(llm.create_completion(
     temperature = 0.0,
-    repeat_penalty = 1.1,
-    prompt = "def add(",
-    suffix = "    return sum"
-))
 ```
 #### Simple llama-cpp-python example function calling code

 # Completion API
+prompt = "def add("
+suffix = "\n    return sum\n\n"
 llm = Llama(model_path="./Codestral-22B-v0.1.IQ4_XS.gguf", n_gpu_layers=57, n_ctx=32768, spm_infill=True)
+output = llm.create_completion(
     temperature = 0.0,
+    repeat_penalty = 1.0,
+    prompt = prompt,
+    suffix = suffix
+)
+# Models sometimes repeat suffix in response, attempt to filter that
+response = output["choices"][0]["text"]
+response_stripped = response.rstrip()
+unwanted_response_suffix = suffix.rstrip()
+unwanted_response_length = len(unwanted_response_suffix)
+filtered = False
+if unwanted_response_suffix and response_stripped[-unwanted_response_length:] == unwanted_response_suffix:
+    response = response_stripped[:-unwanted_response_length]
+    filtered = True
+print(f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{prompt}\033[32m{response}\033[0m{suffix}")
 ```
 #### Simple llama-cpp-python example function calling code