Mattral commited on
Commit
8ef7048
·
verified ·
1 Parent(s): 3a411d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -64,13 +64,15 @@ def get_url_content(url):
64
  @st.cache_resource
65
  def get_retriever(urls):
66
  all_content = [get_url_content(url) for url in urls]
 
67
  documents = [Document(page_content=doc, metadata={'url': url}) for (url, doc) in all_content]
 
68
 
69
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
70
  docs = text_splitter.split_documents(documents)
 
71
 
72
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
73
-
74
  db = DocArrayInMemorySearch.from_documents(docs, embeddings)
75
  retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
76
  return retriever
 
64
  @st.cache_resource
65
  def get_retriever(urls):
66
  all_content = [get_url_content(url) for url in urls]
67
+ print(all_content) # See what is actually fetched
68
  documents = [Document(page_content=doc, metadata={'url': url}) for (url, doc) in all_content]
69
+ print(documents) # Verify that documents are created correctly
70
 
71
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
72
  docs = text_splitter.split_documents(documents)
73
+ print(docs) # Check the final structure of split documents
74
 
75
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
76
  db = DocArrayInMemorySearch.from_documents(docs, embeddings)
77
  retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
78
  return retriever