File size: 859 Bytes
4c7a0b4 b12ea1f 4c7a0b4 f05dba6 b4c7950 b12ea1f b4c7950 b12ea1f 4c7a0b4 b4c7950 f05dba6 b4c7950 f05dba6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
def extract_text(pdf_file):
# Load a document
loader = PDFMinerLoader(pdf_file)
doc = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(doc)
texts = [i.page_content for i in texts]
return texts[0]
# def upload_file(file):
# return file.name
# with gr.Blocks() as demo:
# file_output = gr.File()
# upload_button = gr.UploadButton("Click to Upload a File", file_types="file")
# upload_button.upload(upload_file, upload_button, file_output)
iface = gr.Interface(
fn=extract_text,
inputs=gr.File(type="filepath", label="Upload PDF"),
outputs="text"
)
iface.launch()
|