DrishtiSharma commited on
Commit
b36f0bb
Β·
verified Β·
1 Parent(s): 23248f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -55
app.py CHANGED
@@ -43,64 +43,9 @@ st.title("Blah-2")
43
  # Step 1: Choose PDF Source
44
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Enter a PDF URL", "Upload a PDF file"], index=0, horizontal=True)
45
 
46
- if pdf_source == "Upload a PDF file":
47
- uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
48
- if uploaded_file:
49
- st.session_state.pdf_path = "temp.pdf"
50
- with open(st.session_state.pdf_path, "wb") as f:
51
- f.write(uploaded_file.getbuffer())
52
- st.session_state.pdf_loaded = False
53
- st.session_state.chunked = False
54
- st.session_state.vector_created = False
55
-
56
- elif pdf_source == "Enter a PDF URL":
57
- pdf_url = st.text_input("Enter PDF URL:", key="pdf_url", on_change=lambda: st.session_state.update({"process_pdf": True}))
58
-
59
- if st.session_state.get("process_pdf") and pdf_url: # βœ… Triggered only when Enter is pressed
60
- with st.spinner("Downloading PDF..."):
61
- try:
62
- # Download PDF
63
- response = requests.get(pdf_url)
64
- if response.status_code == 200:
65
- st.session_state.pdf_path = "temp.pdf"
66
- with open(st.session_state.pdf_path, "wb") as f:
67
- f.write(response.content)
68
- st.success("βœ… PDF Downloaded Successfully!")
69
- else:
70
- st.error("❌ Failed to download PDF. Check the URL.")
71
- st.stop()
72
-
73
- # Step 2: Load PDF
74
- st.spinner("Loading PDF...")
75
- loader = PDFPlumberLoader(st.session_state.pdf_path)
76
- docs = loader.load()
77
- st.session_state.documents = docs
78
- st.session_state.pdf_loaded = True
79
- st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
80
 
81
- # Step 3: Chunking the document
82
- st.spinner("Chunking the document...")
83
- model_name = "nomic-ai/modernbert-embed-base"
84
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
85
- text_splitter = SemanticChunker(embedding_model)
86
 
87
- if st.session_state.documents:
88
- documents = text_splitter.split_documents(st.session_state.documents)
89
- st.session_state.documents = documents
90
- st.session_state.chunked = True
91
-
92
- # Save chunks for persistence
93
- CHUNKS_FILE = "/tmp/chunks.pkl"
94
- with open(CHUNKS_FILE, "wb") as f:
95
- pickle.dump(documents, f)
96
-
97
- st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
98
 
99
- # Reset trigger to prevent looping
100
- st.session_state.process_pdf = False
101
-
102
- except Exception as e:
103
- st.error(f"❌ Error: {e}")
104
 
105
 
106
  # Step 2: Load & Process PDF (Only Once)
 
43
  # Step 1: Choose PDF Source
44
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Enter a PDF URL", "Upload a PDF file"], index=0, horizontal=True)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
 
 
 
 
 
47
 
 
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
49
 
50
 
51
  # Step 2: Load & Process PDF (Only Once)