File size: 1,742 Bytes
77be14e
50cc7b6
77be14e
 
 
 
2faef85
 
 
 
 
 
 
77be14e
 
2faef85
 
 
 
 
 
 
 
 
98c78fa
2faef85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98c78fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import solara
import torch
import torch.nn.functional as F
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM

# Cargar el modelo y el tokenizer
model_name = "datificate/gpt2-small-spanish"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

text = solara.reactive("Escribe algo en español")

@solara.component
def Page():
    with solara.Column(margin=10):
        solara.Markdown("# Predicción del Próximo Token")
        solara.Markdown("Ingrese un texto en español y vea las predicciones para el próximo token.")
        
        def on_action_cell(column, row_index):
            text.value += tokenizer.decode(top_10.indices[0][row_index])
        
        cell_actions = [solara.CellAction(icon="mdi-thumb-up", name="Seleccionar", on_click=on_action_cell)]
        
        solara.InputText("Ingrese texto:", value=text, continuous_update=True)
        
        if text.value != "":
            tokens = tokenizer.encode(text.value, return_tensors="pt")
            outputs = model.generate(tokens, max_new_tokens=1, output_scores=True, return_dict_in_generate=True, pad_token_id=tokenizer.eos_token_id)
            scores = F.softmax(outputs.scores[0], dim=-1)
            top_10 = torch.topk(scores, 10)
            
            df = pd.DataFrame({
                "probs": [f"{value:.2%}" for value in top_10.values[0]],
                "next token ID": top_10.indices[0].numpy(),
                "predicted next token": [tokenizer.decode([idx]) for idx in top_10.indices[0]]
            })
            
            solara.Markdown("### Predicción")
            solara.DataFrame(df, items_per_page=10, cell_actions=cell_actions)

Page()