Spaces:

Chemically-motivated
/

pdf_to_json_converter

Running

Canstralian commited on Dec 31, 2024

Commit

97579c6

verified ·

1 Parent(s): a6b52d0

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import json
+import streamlit as st
+from PyPDF2 import PdfReader
+from transformers import pipeline
+# Initialize the Hugging Face model pipeline
+model_name = "your-huggingface-model-name"  # Replace with your model's name
+nlp_pipeline = pipeline("text2text-generation", model=model_name)
+def process_pdf(file):
+    reader = PdfReader(file)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    return text
+def convert_to_json(text):
+    # Use the Hugging Face model to process the text
+    result = nlp_pipeline(text)
+    return result[0]['generated_text']
+st.title("PDF to JSON Converter")
+uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+if uploaded_file is not None:
+    st.write("Processing your file...")
+    # Extract text from the PDF
+    pdf_text = process_pdf(uploaded_file)
+    # Convert the extracted text to JSON using the Hugging Face model
+    json_output = convert_to_json(pdf_text)
+    # Display the JSON output
+    st.write("Converted JSON:")
+    st.json(json.loads(json_output))
+    # Provide a download link for the JSON file
+    json_filename = uploaded_file.name.replace(".pdf", ".json")
+    st.download_button(
+        label="Download JSON",
+        data=json_output,
+        file_name=json_filename,
+        mime="application/json"
+    )