File size: 1,234 Bytes
50cc7b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import solara as sol
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "datificate/gpt2-small-spanish"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def predict_next_token(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    next_token_logits = outputs.logits[:, -1, :]
    next_token_probs = torch.softmax(next_token_logits, dim=-1)
    top_k_probs, top_k_indices = torch.topk(next_token_probs, 10)
    top_k_tokens = tokenizer.convert_ids_to_tokens(top_k_indices[0])
    return list(zip(top_k_tokens, top_k_probs[0].tolist()))

@sol.component
def NextTokenPredictionApp():
    text = sol.use_state("")
    predictions = sol.use_state([])

    def on_text_change(new_text):
        text.set(new_text)
        preds = predict_next_token(new_text)
        predictions.set(preds)

    sol.InputText(value=text.value, on_change=on_text_change, placeholder="Escribe algo en español...")

    if predictions.value:
        sol.Markdown("### Predicciones de tokens:")
        for token, prob in predictions.value:
            sol.Markdown(f"- {token}: {prob:.4f}")

sol.run(NextTokenPredictionApp)