Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,15 +14,22 @@ async def root():
|
|
14 |
@app.post("/hello/")
|
15 |
def say_hello(msg: Annotated[str, Form()]):
|
16 |
print("model")
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
)
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
24 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
25 |
print("output")
|
26 |
-
|
|
|
|
|
27 |
print("complete")
|
28 |
return {"message": tokenizer.decode(outputs[0])}
|
|
|
14 |
@app.post("/hello/")
|
15 |
def say_hello(msg: Annotated[str, Form()]):
|
16 |
print("model")
|
17 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
18 |
+
checkpoint = "HuggingFaceTB/SmolLM-1.7B-Instruct"
|
19 |
+
|
20 |
+
device = "cpu" # for GPU usage or "cpu" for CPU usage
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
22 |
+
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
23 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
24 |
+
|
25 |
+
messages = [{"role": "user", "content": "things about elasticsearch"}]
|
26 |
+
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
|
27 |
+
print(input_text)
|
28 |
+
|
29 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
30 |
print("output")
|
31 |
+
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
32 |
+
outputs = model.generate(inputs, max_new_tokens=256, temperature=0.6, top_p=0.92, do_sample=True)
|
33 |
+
|
34 |
print("complete")
|
35 |
return {"message": tokenizer.decode(outputs[0])}
|