File size: 2,791 Bytes
5bf3195
 
 
 
 
c8b8b02
 
5bf3195
75a550e
5bf3195
c8b8b02
 
5bf3195
75a550e
5bf3195
c8b8b02
3dcc4e8
 
5bf3195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8b8b02
ac8d16a
3dcc4e8
5bf3195
 
 
 
c8b8b02
 
5bf3195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pinecone
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer

PINECONE_KEY = st.secrets["PINECONE_API_KEY"]  # app.pinecone.io
PINE_CONE_ENVIRONMENT = st.secrets["PINE_CONE_ENVIRONMENT"]  # app.pinecone.io

@st.cache_resource
def init_pinecone():
    pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)  # get a free api key from app.pinecone.io
    return pinecone.Index("dompany-description")
    
@st.cache_resource
def init_models():
    #retriever = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
    retriever = SentenceTransformer("all-MiniLM-L6-v2")
    model_name = "all-MiniLM-L6-v2"
    reader = pipeline(tokenizer=model_name, model=model_name, task='question-answering')
    return retriever, reader

st.session_state.index = init_pinecone()
retriever, reader = init_models()


def card(title, context, score):
    return st.markdown(f"""
    <div class="container-fluid">
        <div class="row align-items-start">
             <div  class="col-md-12 col-sm-12">
                 <b>{title}</b>
                 <br>
                 <span style="color: #808080;">
                     <small>{context}</small>
                     [<b>Score: </b>{score}]
                 </span>
             </div>
        </div>
     </div>
        """, unsafe_allow_html=True)

st.title("")

st.write("""
# Extractive Question Answering
Ask me a question!
""")

st.markdown("""
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
""", unsafe_allow_html=True)

def run_query(query):
    xq = retriever.encode([query]).tolist()
    try:
        xc = st.session_state.index.query(xq, top_k=3, include_metadata=True)
    except:
        # force reload
        pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
        st.session_state.index = pinecone.Index("company-description")
        xc = st.session_state.index.query(xq, top_k=10, include_metadata=True)

    results = []
    for match in xc['matches']:
        answer = reader(question=query, context=match["metadata"]['context'])
        answer["title"] = match["metadata"]['name']
        answer["context"] = match["metadata"]['name']
        results.append(answer)

    sorted_result = sorted(results, key=lambda x: x['score'], reverse=True)

    for r in sorted_result:
        answer = r["answer"]
        context = r["context"].replace(answer, f"<mark>{answer}</mark>")
        title = r["title"].replace("_", " ")
        score = round(r["score"], 4)
        card(title, context, score)

query = st.text_input("Search!", "")

if query != "":
    run_query(query)