Canstralian's picture
Create app.py
97579c6 verified
raw
history blame
1.32 kB
import json
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline
# Initialize the Hugging Face model pipeline
model_name = "your-huggingface-model-name" # Replace with your model's name
nlp_pipeline = pipeline("text2text-generation", model=model_name)
def process_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def convert_to_json(text):
# Use the Hugging Face model to process the text
result = nlp_pipeline(text)
return result[0]['generated_text']
st.title("PDF to JSON Converter")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
st.write("Processing your file...")
# Extract text from the PDF
pdf_text = process_pdf(uploaded_file)
# Convert the extracted text to JSON using the Hugging Face model
json_output = convert_to_json(pdf_text)
# Display the JSON output
st.write("Converted JSON:")
st.json(json.loads(json_output))
# Provide a download link for the JSON file
json_filename = uploaded_file.name.replace(".pdf", ".json")
st.download_button(
label="Download JSON",
data=json_output,
file_name=json_filename,
mime="application/json"
)