Update main.py
Browse files
main.py
CHANGED
@@ -5,6 +5,7 @@ import prompt_style
|
|
5 |
import os
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
from llama_cpp import Llama
|
|
|
8 |
|
9 |
|
10 |
model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3-GGUF"
|
@@ -40,14 +41,17 @@ def format_prompt(item: Item):
|
|
40 |
def generate(item: Item):
|
41 |
formatted_prompt = format_prompt(item)
|
42 |
output = model.create_chat_completion(messages=formatted_prompt, seed=item.seed,
|
43 |
-
temperature=item.temperature,
|
44 |
|
45 |
out = output['choices'][0]['message']['content']
|
46 |
return out
|
47 |
|
48 |
@app.post("/generate/")
|
49 |
async def generate_text(item: Item):
|
|
|
50 |
ans = generate(item)
|
|
|
|
|
51 |
return {"response": ans}
|
52 |
|
53 |
|
|
|
5 |
import os
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
from llama_cpp import Llama
|
8 |
+
import time
|
9 |
|
10 |
|
11 |
model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3-GGUF"
|
|
|
41 |
def generate(item: Item):
|
42 |
formatted_prompt = format_prompt(item)
|
43 |
output = model.create_chat_completion(messages=formatted_prompt, seed=item.seed,
|
44 |
+
temperature=item.temperature, max_tokens=item.max_new_tokens)
|
45 |
|
46 |
out = output['choices'][0]['message']['content']
|
47 |
return out
|
48 |
|
49 |
@app.post("/generate/")
|
50 |
async def generate_text(item: Item):
|
51 |
+
t1 = time.time()
|
52 |
ans = generate(item)
|
53 |
+
print(ans)
|
54 |
+
print(f"time: {str(time.time() - t1)}")
|
55 |
return {"response": ans}
|
56 |
|
57 |
|