Spaces:
Sleeping
Sleeping
import tempfile | |
import os | |
import streamlit as st | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import Embedding | |
from langchain_community.embeddings.groq import GroqEmbedding | |
# Function to process PDF | |
def process_pdf(file): | |
# Save the uploaded file into a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile: | |
tmpfile.write(file.read()) # Write the uploaded file's content | |
tmpfile_path = tmpfile.name # Get the file path | |
return tmpfile_path | |
# Main function to run the app | |
def main(): | |
st.title("PDF Embedding and Query System") | |
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
if uploaded_file is not None: | |
# Process the uploaded PDF file | |
tmp_file_path = process_pdf(uploaded_file) | |
# Load the PDF content | |
loader = PyPDFLoader(tmp_file_path) | |
documents = loader.load() | |
# Use Groq embeddings (assuming Groq API key is set correctly) | |
embeddings = GroqEmbedding(api_key="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") | |
# Create a vector database | |
vector_db = FAISS.from_documents(documents, embeddings) | |
# Perform search or other actions | |
query = st.text_input("Enter a query to search:") | |
if query: | |
results = vector_db.similarity_search(query, k=5) | |
for result in results: | |
st.write(result["text"]) | |
# Run the app | |
if __name__ == "__main__": | |
main() | |