Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,24 +1,25 @@
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
|
4 |
from langchain.llms import HuggingFacePipeline
|
|
|
|
|
5 |
from huggingface_hub import login
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
login(
|
10 |
|
11 |
-
#
|
12 |
model_id = "meta-llama/Llama-3.1-1B"
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
14 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
|
15 |
|
16 |
-
# Crear pipeline
|
17 |
-
pipe =
|
18 |
llm_pipeline = HuggingFacePipeline(pipeline=pipe)
|
19 |
|
20 |
# Interfaz de Streamlit
|
21 |
-
st.title("Cosine Similarity
|
22 |
|
23 |
# Subir archivo CSV
|
24 |
uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
|
@@ -31,33 +32,39 @@ if uploaded_file is not None:
|
|
31 |
query = 'aspiring human resources specialist'
|
32 |
job_titles = df['job_title'].tolist()
|
33 |
|
34 |
-
# Definir el prompt para
|
35 |
-
prompt
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
|
|
|
|
53 |
|
54 |
-
#
|
55 |
-
if st.button("
|
56 |
-
with st.spinner("Calculando similitudes con Llama 3.1..."):
|
57 |
try:
|
58 |
-
|
59 |
st.write("Respuesta del modelo:")
|
60 |
-
st.write(
|
61 |
|
62 |
# Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
|
63 |
df['Score'] = [0.95] * len(df) # Simulaci贸n para la demostraci贸n
|
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
|
|
3 |
from langchain.llms import HuggingFacePipeline
|
4 |
+
from langchain import PromptTemplate, LLMChain
|
5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
6 |
from huggingface_hub import login
|
7 |
|
8 |
+
# Iniciar sesi贸n en Hugging Face con Fireworks API Key
|
9 |
+
fireworks_token = st.secrets["FIREWORKS_API_KEY"]
|
10 |
+
login(fireworks_token)
|
11 |
|
12 |
+
# Configurar modelo Llama 3.1
|
13 |
model_id = "meta-llama/Llama-3.1-1B"
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
15 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
|
16 |
|
17 |
+
# Crear pipeline con Fireworks
|
18 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1024)
|
19 |
llm_pipeline = HuggingFacePipeline(pipeline=pipe)
|
20 |
|
21 |
# Interfaz de Streamlit
|
22 |
+
st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")
|
23 |
|
24 |
# Subir archivo CSV
|
25 |
uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
|
|
|
32 |
query = 'aspiring human resources specialist'
|
33 |
job_titles = df['job_title'].tolist()
|
34 |
|
35 |
+
# Definir el prompt para usar Fireworks para c谩lculo de similitud de coseno
|
36 |
+
# Crear el prompt mejorado para Fireworks
|
37 |
+
prompt_template = PromptTemplate(
|
38 |
+
template=(
|
39 |
+
"You are an AI model with access to external embeddings services. Your task is to calculate the cosine similarity "
|
40 |
+
"between a given query and a list of job titles using embeddings obtained from an external service. "
|
41 |
+
"Follow these steps to complete the task:\n\n"
|
42 |
+
"1. Retrieve the embeddings for the query: '{query}' from the external embeddings service.\n"
|
43 |
+
"2. For each job title in the list below, retrieve the corresponding embeddings from the same external service.\n"
|
44 |
+
"3. Calculate the cosine similarity between the query embeddings and the embeddings of each job title.\n"
|
45 |
+
"4. Return the results in the following format:\n"
|
46 |
+
" - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
|
47 |
+
" - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
|
48 |
+
" ...\n\n"
|
49 |
+
"The list of job titles is:\n{job_titles}\n\n"
|
50 |
+
"Remember to access the embeddings service directly and ensure that the cosine similarity scores are calculated accurately based on the semantic similarity between the embeddings."
|
51 |
+
),
|
52 |
+
input_variables=["query", "job_titles"]
|
53 |
+
)
|
54 |
|
55 |
+
# Crear el LLMChain para manejar la interacci贸n con Fireworks
|
56 |
+
llm_chain = LLMChain(
|
57 |
+
llm=llm_pipeline,
|
58 |
+
prompt=prompt_template
|
59 |
+
)
|
60 |
|
61 |
+
# Ejecutar la generaci贸n con el LLM
|
62 |
+
if st.button("Calcular Similitud de Coseno"):
|
63 |
+
with st.spinner("Calculando similitudes con Fireworks y Llama 3.1..."):
|
64 |
try:
|
65 |
+
result = llm_chain.run({"query": query, "job_titles": job_titles})
|
66 |
st.write("Respuesta del modelo:")
|
67 |
+
st.write(result)
|
68 |
|
69 |
# Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
|
70 |
df['Score'] = [0.95] * len(df) # Simulaci贸n para la demostraci贸n
|