File size: 589 Bytes
4c7a0b4 b12ea1f 4c7a0b4 f05dba6 b12ea1f 4c7a0b4 f05dba6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import gradio as gr
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
def extract_text(pdf_file):
# Load a document
loader = PDFMinerLoader("cereal.pdf")
doc = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(doc)
texts = [i.page_content for i in texts]
return texts[0]
iface = gr.Interface(
fn=extract_text,
inputs=gr.File(type="file", label="Upload PDF"),
outputs="text"
)
iface.launch()
|