Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from appStore.prep_data import process_giz_worldwide | |
from appStore.prep_utils import create_documents, get_client | |
from appStore.embed import hybrid_embed_chunks | |
from torch import cuda | |
# get the device to be used eithe gpu or cpu | |
device = 'cuda' if cuda.is_available() else 'cpu' | |
st.set_page_config(page_title="SEARCH IATI",layout='wide') | |
st.title("SEARCH IATI Database") | |
var=st.text_input("enter keyword") | |
#################### Create the embeddings collection and save ###################### | |
# the steps below need to be performed only once and then commented out any unnecssary compute over-run | |
##### First we process and create the chunks for relvant data source | |
#chunks = process_giz_worldwide() | |
##### Convert to langchain documents | |
#temp_doc = create_documents(chunks,'chunks') | |
##### Embed and store docs, check if collection exist then you need to update the collection | |
#collection_name = "giz_worldwide" | |
#hybrid_embed_chunks(docs= temp_doc, collection_name = collection_name) | |
################### Hybrid Search ###################################################### | |
client = get_client() | |
print(client.get_collections()) | |
button=st.button("search") | |
#found_docs = vectorstore.similarity_search(var) | |
#print(found_docs) | |
# results= get_context(vectorstore, f"find the relvant paragraphs for: {var}") | |
if button: | |
st.write(f"Found {len(results)} results for query:{var}") | |
for i in results: | |
st.subheader(str(i.metadata['id'])+":"+str(i.metadata['title_main'])) | |
st.caption(f"Status:{str(i.metadata['status'])}, Country:{str(i.metadata['country_name'])}") | |
st.write(i.page_content) | |
st.divider() | |