chatPDF / app.py
dammy's picture
Update app.py
b4c7950
raw
history blame
859 Bytes
import gradio as gr
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
def extract_text(pdf_file):
# Load a document
loader = PDFMinerLoader(pdf_file)
doc = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(doc)
texts = [i.page_content for i in texts]
return texts[0]
# def upload_file(file):
# return file.name
# with gr.Blocks() as demo:
# file_output = gr.File()
# upload_button = gr.UploadButton("Click to Upload a File", file_types="file")
# upload_button.upload(upload_file, upload_button, file_output)
iface = gr.Interface(
fn=extract_text,
inputs=gr.File(type="filepath", label="Upload PDF"),
outputs="text"
)
iface.launch()