File size: 3,849 Bytes
2a0f243
dbd2f4b
cf47d83
69e3a41
3d477e1
69e3a41
2a0f243
85ec4d4
 
 
2a0f243
763be08
ea3c34e
dbd2f4b
a77f2b3
 
fde1d1c
70ed6f0
dbd2f4b
 
 
 
 
 
 
 
 
 
612500f
dbd2f4b
a3bc7ec
58a4111
 
 
763be08
fde1d1c
58a4111
 
 
ea3c34e
58a4111
fde1d1c
58a4111
ea3c34e
612500f
dbd2f4b
ea3c34e
a3bc7ec
58a4111
ff1d6d5
58a4111
ff1d6d5
 
58a4111
 
 
5d3dc3e
ea3c34e
b99eeda
ea3c34e
 
 
 
 
c2b4dad
ff1d6d5
763be08
ff1d6d5
105c4c8
006e69f
 
ff1d6d5
 
 
 
006e69f
ee5951e
612500f
006e69f
763be08
 
 
338b938
763be08
 
 
 
2b5a681
ea3c34e
 
 
b99eeda
ea3c34e
 
763be08
 
2b5a681
ea3c34e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from accelerate import init_empty_weights, load_checkpoint_and_dispatch, dispatch_model, infer_auto_device_map
import streamlit as st
from huggingface_hub import login
import pandas as pd

# Token Secret de Hugging Face
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
login(huggingface_token)

# Cargar el tokenizador y el modelo
model_id = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id) #, device_map="auto")
tokenizer.pad_token = tokenizer.eos_token

MAX_INPUT_TOKEN_LENGTH = 10000

# Cargar el modelo con disk_offload
with init_empty_weights():
    model = AutoModelForCausalLM.from_config(model_id)

device_map = infer_auto_device_map(model, max_memory={"disk": "2GiB"}, no_split_module_classes=["LlamaDecoderLayer"])
model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, offload_folder="offload_dir")

MAX_INPUT_TOKEN_LENGTH = 10000


def generate_response(input_text, temperature=0.7, max_new_tokens=20):
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to("cpu")  # Usar 'cpu' para mantener la compatibilidad

    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        st.warning(f"Se recort贸 la entrada porque excedi贸 el l铆mite de {MAX_INPUT_TOKEN_LENGTH} tokens.")

    streamer = TextIteratorStreamer(tokenizer, timeout=120.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_k=20,
        top_p=0.9,
        temperature=temperature,
        num_return_sequences=3,
        eos_token_id=tokenizer.eos_token_id
    )

    try:
        outputs = model.generate(**generate_kwargs)

        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        return response.split("\n")[0]
    except Exception as e:
        st.error(f"Error durante la generaci贸n: {e}")
        return "Error en la generaci贸n de texto."

def main():
    st.title("Chat con Meta Llama 3.2 1B")
    
    uploaded_file = st.file_uploader("Por favor, sube un archivo CSV para iniciar:", type=["csv"])
    
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)
        query = 'aspiring human resources specialist'
        
        if 'job_title' in df.columns:
            job_titles = df['job_title'].tolist()

            # Definir el prompt con in-context learning
            initial_prompt = (
                f"Extract the first record from the dataframe df.\n"
                f"First job title: '{df.iloc[0]['job_title']}'\n"
                f"Calculate the cosine similarity between this job title and the query: '{query}'.\n"
                "Print the cosine similarity score."
            )

            st.write("Prompt inicial con In-context Learning:\n")
            st.write(initial_prompt)

            if st.button("Generar respuesta"):
                with st.spinner("Generando respuesta..."):
                    response = generate_response(initial_prompt, temperature=0.5)
                    if response:
                        st.write(f"Respuesta del modelo: {response}")
                    else:
                        st.warning("No se pudo generar una respuesta.")

                st.success("La conversaci贸n ha terminado.")
                
                if st.button("Iniciar nueva conversaci贸n"):
                    st.experimental_rerun()
                elif st.button("Terminar"):
                    st.stop()
        else:
            st.error("La columna 'job_title' no se encuentra en el archivo CSV.")

if __name__ == "__main__":
    main()