ppsingh commited on
Commit
9392032
·
verified ·
1 Parent(s): af8bf50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -28
app.py CHANGED
@@ -19,41 +19,19 @@ st.set_page_config(page_title="SEARCH IATI",layout='wide')
19
  st.title("SEARCH IATI Database")
20
  var=st.text_input("enter keyword")
21
 
22
-
23
- def get_context(vectorstore,query):
24
- # create metadata filter
25
-
26
-
27
- # getting context
28
- retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",
29
- search_kwargs={"score_threshold": 0.5,
30
- "k": 10,})
31
- # # re-ranking the retrieved results
32
- # model = HuggingFaceCrossEncoder(model_name=model_config.get('ranker','MODEL'))
33
- # compressor = CrossEncoderReranker(model=model, top_n=int(model_config.get('ranker','TOP_K')))
34
- # compression_retriever = ContextualCompressionRetriever(
35
- # base_compressor=compressor, base_retriever=retriever
36
- # )
37
- context_retrieved = retriever.invoke(query)
38
- print(f"retrieved paragraphs:{len(context_retrieved)}")
39
-
40
- return context_retrieved
41
-
42
- # first we process and create the chunks for relvant data source
43
  #chunks = process_giz_worldwide()
44
- # we convert to langchain documents
45
  #temp_doc = create_documents(chunks,'chunks')
46
- # embed and store docs
47
  #hybrid_embed_chunks(docs= temp_doc, collection_name = "giz_worldwide")
48
 
 
49
 
50
- print("embedding done")
51
 
52
- # once the chunks are done, we perform hybrid emebddings
53
- #embed_chunks(chunks)
54
 
55
- #vectorstores = get_local_qdrant('giz_worldwide')
56
- #vectorstore = vectorstores['giz_worldwide']
57
  button=st.button("search")
58
  #found_docs = vectorstore.similarity_search(var)
59
  #print(found_docs)
 
19
  st.title("SEARCH IATI Database")
20
  var=st.text_input("enter keyword")
21
 
22
+ #################### Create the embeddings collection and save ######################
23
+ # the steps below need to be performed only once and then commented out any unnecssary compute over-run
24
+ ##### First we process and create the chunks for relvant data source
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  #chunks = process_giz_worldwide()
26
+ ##### Convert to langchain documents
27
  #temp_doc = create_documents(chunks,'chunks')
28
+ ##### Embed and store docs, check if collection exist then you need to update the collection
29
  #hybrid_embed_chunks(docs= temp_doc, collection_name = "giz_worldwide")
30
 
31
+ ################### Hybrid Search ######################################################
32
 
 
33
 
 
 
34
 
 
 
35
  button=st.button("search")
36
  #found_docs = vectorstore.similarity_search(var)
37
  #print(found_docs)