DrishtiSharma commited on
Commit
c98699f
Β·
verified Β·
1 Parent(s): aeca549

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -117,7 +117,7 @@ if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
117
  st.json(docs[0].metadata)
118
 
119
  # Extract metadata
120
- title, author, email_str, affiliation_str = extract_metadata(st.session_state.pdf_path)
121
 
122
  # Display extracted metadata
123
  st.subheader("πŸ“„ Extracted Document Metadata")
@@ -131,8 +131,8 @@ if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
131
  embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
132
 
133
  # Convert metadata into a retrievable chunk
134
- metadata_text = f"Title: {title}\nAuthor: {author}\nEmails: {email_str}\nAffiliations: {affiliation_str}"
135
- metadata_doc = {"page_content": metadata_text, "metadata": {"source": "metadata"}}
136
 
137
  # Prevent unnecessary re-chunking
138
  if not st.session_state.chunked:
 
117
  st.json(docs[0].metadata)
118
 
119
  # Extract metadata
120
+ metadata = extract_metadata_llm(st.session_state.pdf_path)
121
 
122
  # Display extracted metadata
123
  st.subheader("πŸ“„ Extracted Document Metadata")
 
131
  embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
132
 
133
  # Convert metadata into a retrievable chunk
134
+ metadata_doc = {"page_content": metadata, "metadata": {"source": "metadata"}}
135
+
136
 
137
  # Prevent unnecessary re-chunking
138
  if not st.session_state.chunked: