Improve FIM example
Browse files
README.md
CHANGED
@@ -185,13 +185,29 @@ from llama_cpp import Llama
|
|
185 |
|
186 |
# Completion API
|
187 |
|
|
|
|
|
|
|
188 |
llm = Llama(model_path="./Codestral-22B-v0.1.IQ4_XS.gguf", n_gpu_layers=57, n_ctx=32768, spm_infill=True)
|
189 |
-
|
190 |
temperature = 0.0,
|
191 |
-
repeat_penalty = 1.
|
192 |
-
prompt =
|
193 |
-
suffix =
|
194 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
```
|
196 |
|
197 |
#### Simple llama-cpp-python example function calling code
|
|
|
185 |
|
186 |
# Completion API
|
187 |
|
188 |
+
prompt = "def add("
|
189 |
+
suffix = "\n return sum\n\n"
|
190 |
+
|
191 |
llm = Llama(model_path="./Codestral-22B-v0.1.IQ4_XS.gguf", n_gpu_layers=57, n_ctx=32768, spm_infill=True)
|
192 |
+
output = llm.create_completion(
|
193 |
temperature = 0.0,
|
194 |
+
repeat_penalty = 1.0,
|
195 |
+
prompt = prompt,
|
196 |
+
suffix = suffix
|
197 |
+
)
|
198 |
+
|
199 |
+
# Models sometimes repeat suffix in response, attempt to filter that
|
200 |
+
response = output["choices"][0]["text"]
|
201 |
+
response_stripped = response.rstrip()
|
202 |
+
unwanted_response_suffix = suffix.rstrip()
|
203 |
+
unwanted_response_length = len(unwanted_response_suffix)
|
204 |
+
|
205 |
+
filtered = False
|
206 |
+
if unwanted_response_suffix and response_stripped[-unwanted_response_length:] == unwanted_response_suffix:
|
207 |
+
response = response_stripped[:-unwanted_response_length]
|
208 |
+
filtered = True
|
209 |
+
|
210 |
+
print(f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{prompt}\033[32m{response}\033[0m{suffix}")
|
211 |
```
|
212 |
|
213 |
#### Simple llama-cpp-python example function calling code
|