JaphetHernandez commited on
Commit
2c1b805
verified
1 Parent(s): e373768

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -31
app.py CHANGED
@@ -1,24 +1,25 @@
1
  import pandas as pd
2
  import streamlit as st
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
4
  from langchain.llms import HuggingFacePipeline
 
 
5
  from huggingface_hub import login
6
 
7
- # Token de Hugging Face (Secreto)
8
- huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
9
- login(huggingface_token)
10
 
11
- # Cargar el modelo Llama 3.1 y el tokenizador
12
  model_id = "meta-llama/Llama-3.1-1B"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id)
14
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
15
 
16
- # Crear pipeline de generaci贸n de texto usando TextGenerationPipeline
17
- pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, device=0, max_length=1024)
18
  llm_pipeline = HuggingFacePipeline(pipeline=pipe)
19
 
20
  # Interfaz de Streamlit
21
- st.title("Cosine Similarity Simulation with Llama 3.1")
22
 
23
  # Subir archivo CSV
24
  uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
@@ -31,33 +32,39 @@ if uploaded_file is not None:
31
  query = 'aspiring human resources specialist'
32
  job_titles = df['job_title'].tolist()
33
 
34
- # Definir el prompt para simular la similitud de coseno
35
- prompt = (
36
- f"You are an AI model trained to calculate semantic similarity using cosine similarity scores. "
37
- f"The query is: '{query}'. You will compare this query to a list of job titles and estimate the cosine similarity score "
38
- f"based on the semantic meaning. For each job title, assign a similarity score between 0 and 1. "
39
- f"Output the results in the following format:\n\n"
40
- f"1. Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
41
- f"2. Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
42
- f"...\n\n"
43
- f"Job Titles:\n"
44
- )
45
-
46
- # Agregar los t铆tulos de trabajo al prompt
47
- for i, title in enumerate(job_titles, 1):
48
- prompt += f"{i}. {title}\n"
 
 
 
 
49
 
50
- # Mostrar el prompt en la interfaz
51
- st.write("Prompt enviado al LLM:")
52
- st.write(prompt)
 
 
53
 
54
- # Generar respuesta del LLM
55
- if st.button("Generar puntajes de similitud"):
56
- with st.spinner("Calculando similitudes con Llama 3.1..."):
57
  try:
58
- response = llm_pipeline(prompt)[0]['generated_text']
59
  st.write("Respuesta del modelo:")
60
- st.write(response)
61
 
62
  # Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
63
  df['Score'] = [0.95] * len(df) # Simulaci贸n para la demostraci贸n
 
1
  import pandas as pd
2
  import streamlit as st
 
3
  from langchain.llms import HuggingFacePipeline
4
+ from langchain import PromptTemplate, LLMChain
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
  from huggingface_hub import login
7
 
8
+ # Iniciar sesi贸n en Hugging Face con Fireworks API Key
9
+ fireworks_token = st.secrets["FIREWORKS_API_KEY"]
10
+ login(fireworks_token)
11
 
12
+ # Configurar modelo Llama 3.1
13
  model_id = "meta-llama/Llama-3.1-1B"
14
  tokenizer = AutoTokenizer.from_pretrained(model_id)
15
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
16
 
17
+ # Crear pipeline con Fireworks
18
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1024)
19
  llm_pipeline = HuggingFacePipeline(pipeline=pipe)
20
 
21
  # Interfaz de Streamlit
22
+ st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")
23
 
24
  # Subir archivo CSV
25
  uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
 
32
  query = 'aspiring human resources specialist'
33
  job_titles = df['job_title'].tolist()
34
 
35
+ # Definir el prompt para usar Fireworks para c谩lculo de similitud de coseno
36
+ # Crear el prompt mejorado para Fireworks
37
+ prompt_template = PromptTemplate(
38
+ template=(
39
+ "You are an AI model with access to external embeddings services. Your task is to calculate the cosine similarity "
40
+ "between a given query and a list of job titles using embeddings obtained from an external service. "
41
+ "Follow these steps to complete the task:\n\n"
42
+ "1. Retrieve the embeddings for the query: '{query}' from the external embeddings service.\n"
43
+ "2. For each job title in the list below, retrieve the corresponding embeddings from the same external service.\n"
44
+ "3. Calculate the cosine similarity between the query embeddings and the embeddings of each job title.\n"
45
+ "4. Return the results in the following format:\n"
46
+ " - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
47
+ " - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
48
+ " ...\n\n"
49
+ "The list of job titles is:\n{job_titles}\n\n"
50
+ "Remember to access the embeddings service directly and ensure that the cosine similarity scores are calculated accurately based on the semantic similarity between the embeddings."
51
+ ),
52
+ input_variables=["query", "job_titles"]
53
+ )
54
 
55
+ # Crear el LLMChain para manejar la interacci贸n con Fireworks
56
+ llm_chain = LLMChain(
57
+ llm=llm_pipeline,
58
+ prompt=prompt_template
59
+ )
60
 
61
+ # Ejecutar la generaci贸n con el LLM
62
+ if st.button("Calcular Similitud de Coseno"):
63
+ with st.spinner("Calculando similitudes con Fireworks y Llama 3.1..."):
64
  try:
65
+ result = llm_chain.run({"query": query, "job_titles": job_titles})
66
  st.write("Respuesta del modelo:")
67
+ st.write(result)
68
 
69
  # Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
70
  df['Score'] = [0.95] * len(df) # Simulaci贸n para la demostraci贸n