Spaces:

JaphetHernandez
/

Prueba_1

Sleeping

App Files Files Community

Prueba_1 / app.py

JaphetHernandez

Update app.py

d37a28d verified 9 months ago

raw

history blame

3.03 kB

	import pandas as pd
	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from langchain.llms import HuggingFacePipeline
	from huggingface_hub import login

	# Token de Hugging Face (Secreto)
	huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
	login(huggingface_token)

	# Cargar el modelo Llama 3.1 y el tokenizador
	model_id = "meta-llama/Llama-3.1-1B"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")

	# Crear pipeline de generación de texto
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1024)
	llm_pipeline = HuggingFacePipeline(pipeline=pipe)

	# Interfaz de Streamlit
	st.title("Cosine Similarity Simulation with Llama 3.1")

	# Subir archivo CSV
	uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])

	if uploaded_file is not None:
	# Cargar el CSV en un DataFrame
	df = pd.read_csv(uploaded_file)

	if 'job_title' in df.columns:
	query = 'aspiring human resources specialist'
	job_titles = df['job_title'].tolist()

	# Definir el prompt para simular la similitud de coseno
	prompt = (
	f"You are an AI model trained to calculate semantic similarity using cosine similarity scores. "
	f"The query is: '{query}'. You will compare this query to a list of job titles and estimate the cosine similarity score "
	f"based on the semantic meaning. For each job title, assign a similarity score between 0 and 1. "
	f"Output the results in the following format:\n\n"
	f"1. Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
	f"2. Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
	f"...\n\n"
	f"Job Titles:\n"
	)

	# Agregar los títulos de trabajo al prompt
	for i, title in enumerate(job_titles, 1):
	prompt += f"{i}. {title}\n"

	# Mostrar el prompt en la interfaz
	st.write("Prompt enviado al LLM:")
	st.write(prompt)

	# Generar respuesta del LLM
	if st.button("Generar puntajes de similitud"):
	with st.spinner("Calculando similitudes con Llama 3.1..."):
	try:
	response = llm_pipeline(prompt)[0]['generated_text']
	st.write("Respuesta del modelo:")
	st.write(response)

	# Simular la asignación de puntajes en la columna 'Score' (basado en la respuesta del modelo)
	df['Score'] = [0.95] * len(df) # Simulación para la demostración

	# Mostrar el dataframe actualizado
	st.write("DataFrame con los puntajes de similitud:")
	st.write(df)
	except Exception as e:
	st.error(f"Error durante la generación: {e}")
	else:
	st.error("La columna 'job_title' no se encuentra en el archivo CSV.")