Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -19,41 +19,19 @@ st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
|
19 |
st.title("SEARCH IATI Database")
|
20 |
var=st.text_input("enter keyword")
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
# getting context
|
28 |
-
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",
|
29 |
-
search_kwargs={"score_threshold": 0.5,
|
30 |
-
"k": 10,})
|
31 |
-
# # re-ranking the retrieved results
|
32 |
-
# model = HuggingFaceCrossEncoder(model_name=model_config.get('ranker','MODEL'))
|
33 |
-
# compressor = CrossEncoderReranker(model=model, top_n=int(model_config.get('ranker','TOP_K')))
|
34 |
-
# compression_retriever = ContextualCompressionRetriever(
|
35 |
-
# base_compressor=compressor, base_retriever=retriever
|
36 |
-
# )
|
37 |
-
context_retrieved = retriever.invoke(query)
|
38 |
-
print(f"retrieved paragraphs:{len(context_retrieved)}")
|
39 |
-
|
40 |
-
return context_retrieved
|
41 |
-
|
42 |
-
# first we process and create the chunks for relvant data source
|
43 |
#chunks = process_giz_worldwide()
|
44 |
-
|
45 |
#temp_doc = create_documents(chunks,'chunks')
|
46 |
-
|
47 |
#hybrid_embed_chunks(docs= temp_doc, collection_name = "giz_worldwide")
|
48 |
|
|
|
49 |
|
50 |
-
print("embedding done")
|
51 |
|
52 |
-
# once the chunks are done, we perform hybrid emebddings
|
53 |
-
#embed_chunks(chunks)
|
54 |
|
55 |
-
#vectorstores = get_local_qdrant('giz_worldwide')
|
56 |
-
#vectorstore = vectorstores['giz_worldwide']
|
57 |
button=st.button("search")
|
58 |
#found_docs = vectorstore.similarity_search(var)
|
59 |
#print(found_docs)
|
|
|
19 |
st.title("SEARCH IATI Database")
|
20 |
var=st.text_input("enter keyword")
|
21 |
|
22 |
+
#################### Create the embeddings collection and save ######################
|
23 |
+
# the steps below need to be performed only once and then commented out any unnecssary compute over-run
|
24 |
+
##### First we process and create the chunks for relvant data source
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
#chunks = process_giz_worldwide()
|
26 |
+
##### Convert to langchain documents
|
27 |
#temp_doc = create_documents(chunks,'chunks')
|
28 |
+
##### Embed and store docs, check if collection exist then you need to update the collection
|
29 |
#hybrid_embed_chunks(docs= temp_doc, collection_name = "giz_worldwide")
|
30 |
|
31 |
+
################### Hybrid Search ######################################################
|
32 |
|
|
|
33 |
|
|
|
|
|
34 |
|
|
|
|
|
35 |
button=st.button("search")
|
36 |
#found_docs = vectorstore.similarity_search(var)
|
37 |
#print(found_docs)
|