File size: 1,822 Bytes
e6ec663
 
 
e919c00
e6ec663
7cb44eb
 
e6ec663
e919c00
e6ec663
 
 
 
 
17854c9
e6ec663
 
 
 
 
 
e919c00
 
e6ec663
 
 
 
e919c00
e6ec663
 
e919c00
e6ec663
 
 
e919c00
 
e6ec663
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import ctranslate2
import transformers
from huggingface_hub import snapshot_download

model_dir = snapshot_download(repo_id="Praise2112/Mistral-7B-Instruct-v0.1-int8-ct2")
# generator = ctranslate2.Generator(model_dir, device="cuda", compute_type="int8") # GPU
generator = ctranslate2.Generator(model_dir, device="cpu", compute_type="int8") #CPU
tokenizer = transformers.AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
model_inputs = [tokenizer.convert_ids_to_tokens(model_input) for model_input in model_inputs]
generated_ids = generator.generate_batch(model_inputs, max_length=1000, sampling_topk=10)
decoded = [res.sequences_ids[0] for res in generated_ids]
decoded = tokenizer.batch_decode(decoded)
print(decoded[0])


# def speak(prompt):
#     # Tokenizar el prompt y convertirlo a tensores de PyTorch, luego enviarlos al dispositivo especificado
#     model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
#     model.to(device)

#     # Generar texto condicionalmente a partir del prompt utilizando el modelo
#     generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)

#     # Decodificar los identificadores generados en texto y imprimir el resultado
#     resulting_text = tokenizer.batch_decode(generated_ids)[0]
#     return resulting_text


# iface = gr.Interface(fn=speak, inputs="text", outputs="text")
# iface.launch()