File size: 1,611 Bytes
bee5263
 
f79168b
bee5263
3fbd422
a58b418
bab92d5
3fbd422
bee5263
bab92d5
f0a5811
 
bee5263
 
 
 
 
dbcfd8e
 
 
29fbbe7
dbcfd8e
 
 
bee5263
3fbd422
 
 
 
d465d44
f79168b
 
 
3fbd422
bee5263
 
3fbd422
f79168b
 
 
 
3fbd422
f79168b
 
bee5263
 
 
fff1df0
f6819c7
 
 
 
 
bab92d5
f0a5811
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
import uvicorn
import prompt_style
import os
from huggingface_hub import hf_hub_download


model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3-GGUF"
model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)

class Item(BaseModel):
    prompt: str
    history: list
    system_prompt: str
    temperature: float = 0.6
    max_new_tokens: int = 1024
    top_p: float = 0.95
    repetition_penalty: float = 1.0
    seed : int = 42
    
app = FastAPI()

def format_prompt(item: Item):
    messages = [
        {"role": "system", "content": prompt_style.data},
    ]
    for it in item.history:
        messages.append({"role" : "user", "content": it[0]})
        messages.append({"role" : "assistant", "content": it[1]})
    messages.append({"role" : "user", "content": item.prompt})
    return messages

def generate(item: Item):
    formatted_prompt = format_prompt(item)
    output = model.create_chat_completion(messages=formatted_prompt, seed=item.seed, 
                                          temperature=item.temperature,
                                          max_tokens=item.max_new_tokens)


    out = output['choices'][0]['message']['content']
    return out

@app.post("/generate/")
async def generate_text(item: Item):
    ans = generate(item)
    return {"response": ans}


@app.get("/")
def read_root():
    
    return {"Hello": "Worlds"}