Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -41,16 +41,27 @@ chain = chat_template | chat_model | output_parser
|
|
41 |
|
42 |
from langchain_community.document_loaders import PDFMinerLoader
|
43 |
from langchain_text_splitters import NLTKTextSplitter
|
|
|
|
|
44 |
|
45 |
uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
|
46 |
|
47 |
if uploaded_file is not None:
|
48 |
-
|
|
|
49 |
dat_nik = pdf_loader.load()
|
50 |
text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
|
51 |
chunks = test_splitter.split_documents(dat_nik)
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# dat = PDFMinerLoader("2404.07143.pdf")
|
56 |
# dat_nik =dat.load()
|
@@ -62,23 +73,16 @@ else:
|
|
62 |
# chunks = text_splitter.split_documents(dat_nik)
|
63 |
# Creating Chunks Embedding
|
64 |
# We are just loading OpenAIEmbeddings
|
65 |
-
|
|
|
66 |
|
67 |
-
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
68 |
|
69 |
# vectors = embeddings.embed_documents(chunks)
|
70 |
# Store the chunks in vector store
|
71 |
-
|
72 |
|
73 |
# Creating a New Chroma Database
|
74 |
-
|
75 |
-
|
76 |
-
# saving the database on drive
|
77 |
-
db.persist()
|
78 |
-
# Setting a Connection with the ChromaDB
|
79 |
-
db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
|
80 |
-
# Converting CHROMA db_connection to Retriever Object, which retrieves top 5 results
|
81 |
-
retriever = db_connection.as_retriever(search_kwargs={"k": 5})
|
82 |
|
83 |
|
84 |
from langchain_core.runnables import RunnablePassthrough #takes user's question.
|
|
|
41 |
|
42 |
from langchain_community.document_loaders import PDFMinerLoader
|
43 |
from langchain_text_splitters import NLTKTextSplitter
|
44 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
45 |
+
from langchain_community.vectorstores import Chroma
|
46 |
|
47 |
uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
|
48 |
|
49 |
if uploaded_file is not None:
|
50 |
+
pdf_file = io.BytesIO(uploaded_file.read())
|
51 |
+
pdf_loader = PDFMinerLoader(pdf_file)
|
52 |
dat_nik = pdf_loader.load()
|
53 |
text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
|
54 |
chunks = test_splitter.split_documents(dat_nik)
|
55 |
+
|
56 |
+
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
57 |
+
|
58 |
+
db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
|
59 |
+
|
60 |
+
db.persist()
|
61 |
+
|
62 |
+
db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
|
63 |
+
|
64 |
+
retriever = db_connection.as_retriever(search_kwargs={"k": 5})
|
65 |
|
66 |
# dat = PDFMinerLoader("2404.07143.pdf")
|
67 |
# dat_nik =dat.load()
|
|
|
73 |
# chunks = text_splitter.split_documents(dat_nik)
|
74 |
# Creating Chunks Embedding
|
75 |
# We are just loading OpenAIEmbeddings
|
76 |
+
|
77 |
+
|
78 |
|
|
|
79 |
|
80 |
# vectors = embeddings.embed_documents(chunks)
|
81 |
# Store the chunks in vector store
|
82 |
+
|
83 |
|
84 |
# Creating a New Chroma Database
|
85 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
|
88 |
from langchain_core.runnables import RunnablePassthrough #takes user's question.
|