yashasgupta commited on
Commit
281101c
·
verified ·
1 Parent(s): 4a2a968

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -41,16 +41,27 @@ chain = chat_template | chat_model | output_parser
41
 
42
  from langchain_community.document_loaders import PDFMinerLoader
43
  from langchain_text_splitters import NLTKTextSplitter
 
 
44
 
45
  uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
46
 
47
  if uploaded_file is not None:
48
- pdf_loader = PDFMinerLoader(uploaded_file)
 
49
  dat_nik = pdf_loader.load()
50
  text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
51
  chunks = test_splitter.split_documents(dat_nik)
52
- else:
53
- st.write("Please upload a pdf")
 
 
 
 
 
 
 
 
54
 
55
  # dat = PDFMinerLoader("2404.07143.pdf")
56
  # dat_nik =dat.load()
@@ -62,23 +73,16 @@ else:
62
  # chunks = text_splitter.split_documents(dat_nik)
63
  # Creating Chunks Embedding
64
  # We are just loading OpenAIEmbeddings
65
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
 
66
 
67
- embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
68
 
69
  # vectors = embeddings.embed_documents(chunks)
70
  # Store the chunks in vector store
71
- from langchain_community.vectorstores import Chroma
72
 
73
  # Creating a New Chroma Database
74
- db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
75
-
76
- # saving the database on drive
77
- db.persist()
78
- # Setting a Connection with the ChromaDB
79
- db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
80
- # Converting CHROMA db_connection to Retriever Object, which retrieves top 5 results
81
- retriever = db_connection.as_retriever(search_kwargs={"k": 5})
82
 
83
 
84
  from langchain_core.runnables import RunnablePassthrough #takes user's question.
 
41
 
42
  from langchain_community.document_loaders import PDFMinerLoader
43
  from langchain_text_splitters import NLTKTextSplitter
44
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
45
+ from langchain_community.vectorstores import Chroma
46
 
47
  uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
48
 
49
  if uploaded_file is not None:
50
+ pdf_file = io.BytesIO(uploaded_file.read())
51
+ pdf_loader = PDFMinerLoader(pdf_file)
52
  dat_nik = pdf_loader.load()
53
  text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
54
  chunks = test_splitter.split_documents(dat_nik)
55
+
56
+ embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
57
+
58
+ db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
59
+
60
+ db.persist()
61
+
62
+ db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
63
+
64
+ retriever = db_connection.as_retriever(search_kwargs={"k": 5})
65
 
66
  # dat = PDFMinerLoader("2404.07143.pdf")
67
  # dat_nik =dat.load()
 
73
  # chunks = text_splitter.split_documents(dat_nik)
74
  # Creating Chunks Embedding
75
  # We are just loading OpenAIEmbeddings
76
+
77
+
78
 
 
79
 
80
  # vectors = embeddings.embed_documents(chunks)
81
  # Store the chunks in vector store
82
+
83
 
84
  # Creating a New Chroma Database
85
+
 
 
 
 
 
 
 
86
 
87
 
88
  from langchain_core.runnables import RunnablePassthrough #takes user's question.