import streamlit as st import pandas as pd from appStore.prep_data import process_giz_worldwide from appStore.prep_utils import create_documents, get_client from appStore.embed import hybrid_embed_chunks from appStore.search import hybrid_search from torch import cuda # get the device to be used eithe gpu or cpu device = 'cuda' if cuda.is_available() else 'cpu' st.set_page_config(page_title="SEARCH IATI",layout='wide') st.title("SEARCH IATI Database") var=st.text_input("enter keyword") #################### Create the embeddings collection and save ###################### # the steps below need to be performed only once and then commented out any unnecssary compute over-run ##### First we process and create the chunks for relvant data source #chunks = process_giz_worldwide() ##### Convert to langchain documents #temp_doc = create_documents(chunks,'chunks') ##### Embed and store docs, check if collection exist then you need to update the collection collection_name = "giz_worldwide" #hybrid_embed_chunks(docs= temp_doc, collection_name = collection_name) ################### Hybrid Search ###################################################### client = get_client() print(client.get_collections()) button=st.button("search") #found_docs = vectorstore.similarity_search(var) #print(found_docs) # results= get_context(vectorstore, f"find the relvant paragraphs for: {var}") if button: results = hybrid_search(client, var, collection_name) st.write(f"Showing Top 10 results for query:{var}") st.write(f"Semantic: {len(results[0])}") st.write(results[0]) st.write(f"Semantic: {len(results[1])}") st.write(results[1]) # for i in results: # st.subheader(str(i.metadata['id'])+":"+str(i.metadata['title_main'])) # st.caption(f"Status:{str(i.metadata['status'])}, Country:{str(i.metadata['country_name'])}") # st.write(i.page_content) # st.divider()