Spaces:

JaphetHernandez
/

Prueba_1

Sleeping

App Files Files Community

JaphetHernandez commited on Oct 27, 2024

Commit

2c1b805

verified ·

1 Parent(s): e373768

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -31

app.py CHANGED Viewed

@@ -1,24 +1,25 @@
 import pandas as pd
 import streamlit as st
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
 from langchain.llms import HuggingFacePipeline
 from huggingface_hub import login
-# Token de Hugging Face (Secreto)
-huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
-login(huggingface_token)
-# Cargar el modelo Llama 3.1 y el tokenizador
 model_id = "meta-llama/Llama-3.1-1B"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
-# Crear pipeline de generación de texto usando TextGenerationPipeline
-pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, device=0, max_length=1024)
 llm_pipeline = HuggingFacePipeline(pipeline=pipe)
 # Interfaz de Streamlit
-st.title("Cosine Similarity Simulation with Llama 3.1")
 # Subir archivo CSV
 uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
@@ -31,33 +32,39 @@ if uploaded_file is not None:
         query = 'aspiring human resources specialist'
         job_titles = df['job_title'].tolist()
-        # Definir el prompt para simular la similitud de coseno
-        prompt = (
-            f"You are an AI model trained to calculate semantic similarity using cosine similarity scores. "
-            f"The query is: '{query}'. You will compare this query to a list of job titles and estimate the cosine similarity score "
-            f"based on the semantic meaning. For each job title, assign a similarity score between 0 and 1. "
-            f"Output the results in the following format:\n\n"
-            f"1. Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
-            f"2. Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
-            f"...\n\n"
-            f"Job Titles:\n"
-        )
-        # Agregar los títulos de trabajo al prompt
-        for i, title in enumerate(job_titles, 1):
-            prompt += f"{i}. {title}\n"
-        # Mostrar el prompt en la interfaz
-        st.write("Prompt enviado al LLM:")
-        st.write(prompt)
-        # Generar respuesta del LLM
-        if st.button("Generar puntajes de similitud"):
-            with st.spinner("Calculando similitudes con Llama 3.1..."):
                 try:
-                    response = llm_pipeline(prompt)[0]['generated_text']
                     st.write("Respuesta del modelo:")
-                    st.write(response)
                     # Simular la asignación de puntajes en la columna 'Score' (basado en la respuesta del modelo)
                     df['Score'] = [0.95] * len(df)  # Simulación para la demostración

 import pandas as pd
 import streamlit as st
 from langchain.llms import HuggingFacePipeline
+from langchain import PromptTemplate, LLMChain
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from huggingface_hub import login
+# Iniciar sesión en Hugging Face con Fireworks API Key
+fireworks_token = st.secrets["FIREWORKS_API_KEY"]
+login(fireworks_token)
+# Configurar modelo Llama 3.1
 model_id = "meta-llama/Llama-3.1-1B"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
+# Crear pipeline con Fireworks
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1024)
 llm_pipeline = HuggingFacePipeline(pipeline=pipe)
 # Interfaz de Streamlit
+st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")
 # Subir archivo CSV
 uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
         query = 'aspiring human resources specialist'
         job_titles = df['job_title'].tolist()
+        # Definir el prompt para usar Fireworks para cálculo de similitud de coseno
+        # Crear el prompt mejorado para Fireworks
+        prompt_template = PromptTemplate(
+        template=(
+            "You are an AI model with access to external embeddings services. Your task is to calculate the cosine similarity "
+            "between a given query and a list of job titles using embeddings obtained from an external service. "
+            "Follow these steps to complete the task:\n\n"
+            "1. Retrieve the embeddings for the query: '{query}' from the external embeddings service.\n"
+            "2. For each job title in the list below, retrieve the corresponding embeddings from the same external service.\n"
+            "3. Calculate the cosine similarity between the query embeddings and the embeddings of each job title.\n"
+            "4. Return the results in the following format:\n"
+            "   - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
+            "   - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
+            "   ...\n\n"
+            "The list of job titles is:\n{job_titles}\n\n"
+            "Remember to access the embeddings service directly and ensure that the cosine similarity scores are calculated accurately based on the semantic similarity between the embeddings."
+        ),
+    input_variables=["query", "job_titles"]
+)
+        # Crear el LLMChain para manejar la interacción con Fireworks
+        llm_chain = LLMChain(
+            llm=llm_pipeline,
+            prompt=prompt_template
+        )
+        # Ejecutar la generación con el LLM
+        if st.button("Calcular Similitud de Coseno"):
+            with st.spinner("Calculando similitudes con Fireworks y Llama 3.1..."):
                 try:
+                    result = llm_chain.run({"query": query, "job_titles": job_titles})
                     st.write("Respuesta del modelo:")
+                    st.write(result)
                     # Simular la asignación de puntajes en la columna 'Score' (basado en la respuesta del modelo)
                     df['Score'] = [0.95] * len(df)  # Simulación para la demostración