Spaces:
Sleeping
Sleeping
File size: 3,849 Bytes
2a0f243 dbd2f4b cf47d83 69e3a41 3d477e1 69e3a41 2a0f243 85ec4d4 2a0f243 763be08 ea3c34e dbd2f4b a77f2b3 fde1d1c 70ed6f0 dbd2f4b 612500f dbd2f4b a3bc7ec 58a4111 763be08 fde1d1c 58a4111 ea3c34e 58a4111 fde1d1c 58a4111 ea3c34e 612500f dbd2f4b ea3c34e a3bc7ec 58a4111 ff1d6d5 58a4111 ff1d6d5 58a4111 5d3dc3e ea3c34e b99eeda ea3c34e c2b4dad ff1d6d5 763be08 ff1d6d5 105c4c8 006e69f ff1d6d5 006e69f ee5951e 612500f 006e69f 763be08 338b938 763be08 2b5a681 ea3c34e b99eeda ea3c34e 763be08 2b5a681 ea3c34e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from accelerate import init_empty_weights, load_checkpoint_and_dispatch, dispatch_model, infer_auto_device_map
import streamlit as st
from huggingface_hub import login
import pandas as pd
# Token Secret de Hugging Face
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
login(huggingface_token)
# Cargar el tokenizador y el modelo
model_id = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id) #, device_map="auto")
tokenizer.pad_token = tokenizer.eos_token
MAX_INPUT_TOKEN_LENGTH = 10000
# Cargar el modelo con disk_offload
with init_empty_weights():
model = AutoModelForCausalLM.from_config(model_id)
device_map = infer_auto_device_map(model, max_memory={"disk": "2GiB"}, no_split_module_classes=["LlamaDecoderLayer"])
model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, offload_folder="offload_dir")
MAX_INPUT_TOKEN_LENGTH = 10000
def generate_response(input_text, temperature=0.7, max_new_tokens=20):
input_ids = tokenizer.encode(input_text, return_tensors='pt').to("cpu") # Usar 'cpu' para mantener la compatibilidad
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
st.warning(f"Se recort贸 la entrada porque excedi贸 el l铆mite de {MAX_INPUT_TOKEN_LENGTH} tokens.")
streamer = TextIteratorStreamer(tokenizer, timeout=120.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=True,
top_k=20,
top_p=0.9,
temperature=temperature,
num_return_sequences=3,
eos_token_id=tokenizer.eos_token_id
)
try:
outputs = model.generate(**generate_kwargs)
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
return response.split("\n")[0]
except Exception as e:
st.error(f"Error durante la generaci贸n: {e}")
return "Error en la generaci贸n de texto."
def main():
st.title("Chat con Meta Llama 3.2 1B")
uploaded_file = st.file_uploader("Por favor, sube un archivo CSV para iniciar:", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
query = 'aspiring human resources specialist'
if 'job_title' in df.columns:
job_titles = df['job_title'].tolist()
# Definir el prompt con in-context learning
initial_prompt = (
f"Extract the first record from the dataframe df.\n"
f"First job title: '{df.iloc[0]['job_title']}'\n"
f"Calculate the cosine similarity between this job title and the query: '{query}'.\n"
"Print the cosine similarity score."
)
st.write("Prompt inicial con In-context Learning:\n")
st.write(initial_prompt)
if st.button("Generar respuesta"):
with st.spinner("Generando respuesta..."):
response = generate_response(initial_prompt, temperature=0.5)
if response:
st.write(f"Respuesta del modelo: {response}")
else:
st.warning("No se pudo generar una respuesta.")
st.success("La conversaci贸n ha terminado.")
if st.button("Iniciar nueva conversaci贸n"):
st.experimental_rerun()
elif st.button("Terminar"):
st.stop()
else:
st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
if __name__ == "__main__":
main()
|