semsearch / app.py
hanch's picture
change index name
ac8d16a
raw
history blame
2.79 kB
import pinecone
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer
PINECONE_KEY = st.secrets["PINECONE_API_KEY"] # app.pinecone.io
PINE_CONE_ENVIRONMENT = st.secrets["PINE_CONE_ENVIRONMENT"] # app.pinecone.io
@st.cache_resource
def init_pinecone():
pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) # get a free api key from app.pinecone.io
return pinecone.Index("dompany-description")
@st.cache_resource
def init_models():
#retriever = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
retriever = SentenceTransformer("all-MiniLM-L6-v2")
model_name = "all-MiniLM-L6-v2"
reader = pipeline(tokenizer=model_name, model=model_name, task='question-answering')
return retriever, reader
st.session_state.index = init_pinecone()
retriever, reader = init_models()
def card(title, context, score):
return st.markdown(f"""
<div class="container-fluid">
<div class="row align-items-start">
<div class="col-md-12 col-sm-12">
<b>{title}</b>
<br>
<span style="color: #808080;">
<small>{context}</small>
[<b>Score: </b>{score}]
</span>
</div>
</div>
</div>
""", unsafe_allow_html=True)
st.title("")
st.write("""
# Extractive Question Answering
Ask me a question!
""")
st.markdown("""
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
""", unsafe_allow_html=True)
def run_query(query):
xq = retriever.encode([query]).tolist()
try:
xc = st.session_state.index.query(xq, top_k=3, include_metadata=True)
except:
# force reload
pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
st.session_state.index = pinecone.Index("company-description")
xc = st.session_state.index.query(xq, top_k=10, include_metadata=True)
results = []
for match in xc['matches']:
answer = reader(question=query, context=match["metadata"]['context'])
answer["title"] = match["metadata"]['name']
answer["context"] = match["metadata"]['name']
results.append(answer)
sorted_result = sorted(results, key=lambda x: x['score'], reverse=True)
for r in sorted_result:
answer = r["answer"]
context = r["context"].replace(answer, f"<mark>{answer}</mark>")
title = r["title"].replace("_", " ")
score = round(r["score"], 4)
card(title, context, score)
query = st.text_input("Search!", "")
if query != "":
run_query(query)