Canstralian commited on
Commit
97579c6
·
verified ·
1 Parent(s): a6b52d0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from transformers import pipeline
5
+
6
+ # Initialize the Hugging Face model pipeline
7
+ model_name = "your-huggingface-model-name" # Replace with your model's name
8
+ nlp_pipeline = pipeline("text2text-generation", model=model_name)
9
+
10
+ def process_pdf(file):
11
+ reader = PdfReader(file)
12
+ text = ""
13
+ for page in reader.pages:
14
+ text += page.extract_text()
15
+ return text
16
+
17
+ def convert_to_json(text):
18
+ # Use the Hugging Face model to process the text
19
+ result = nlp_pipeline(text)
20
+ return result[0]['generated_text']
21
+
22
+ st.title("PDF to JSON Converter")
23
+
24
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
25
+
26
+ if uploaded_file is not None:
27
+ st.write("Processing your file...")
28
+
29
+ # Extract text from the PDF
30
+ pdf_text = process_pdf(uploaded_file)
31
+
32
+ # Convert the extracted text to JSON using the Hugging Face model
33
+ json_output = convert_to_json(pdf_text)
34
+
35
+ # Display the JSON output
36
+ st.write("Converted JSON:")
37
+ st.json(json.loads(json_output))
38
+
39
+ # Provide a download link for the JSON file
40
+ json_filename = uploaded_file.name.replace(".pdf", ".json")
41
+ st.download_button(
42
+ label="Download JSON",
43
+ data=json_output,
44
+ file_name=json_filename,
45
+ mime="application/json"
46
+ )