File size: 1,565 Bytes
50cc7b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad05766
 
50cc7b6
 
 
 
 
 
52a1159
 
 
 
 
50cc7b6
 
52a1159
50cc7b6
52a1159
50cc7b6
ad05766
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import solara as sol
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "datificate/gpt2-small-spanish"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def predict_next_token(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    next_token_logits = outputs.logits[:, -1, :]
    next_token_probs = torch.softmax(next_token_logits, dim=-1)
    top_k_probs, top_k_indices = torch.topk(next_token_probs, 10)
    top_k_tokens = tokenizer.convert_ids_to_tokens(top_k_indices[0])
    return list(zip(top_k_tokens, top_k_probs[0].tolist()))

@sol.component
def NextTokenPredictionApp():
    text = sol.reactive("")
    predictions = sol.reactive([])

    def on_text_change(new_text):
        text.set(new_text)
        preds = predict_next_token(new_text)
        predictions.set(preds)

    sol.Markdown("# Predicción del Próximo Token")
    sol.Markdown("Ingrese un texto en español y vea las predicciones para el próximo token.")
    
    sol.InputText(value=text.value, on_change=on_text_change, placeholder="Escribe algo en español...", fullwidth=True)
    sol.Button("Predecir", on_click=lambda: on_text_change(text.value))

    if predictions.value:
        sol.Markdown("## Predicciones de tokens:")
        for token, prob in predictions.value:
            sol.Markdown(f"- **{token}**: {prob:.4f}")

# Iniciar la aplicación en modo de desarrollo
app = sol.App(NextTokenPredictionApp, title="Next Token Prediction App")