File size: 589 Bytes
4c7a0b4
b12ea1f
 
 
4c7a0b4
f05dba6
b12ea1f
 
 
 
 
 
 
 
 
 
4c7a0b4
f05dba6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter


def extract_text(pdf_file):
    # Load a document
    loader = PDFMinerLoader("cereal.pdf")
    doc = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(doc)

    texts = [i.page_content for i in texts]

    return texts[0]

iface = gr.Interface(
    fn=extract_text,
    inputs=gr.File(type="file", label="Upload PDF"),
    outputs="text"
)

iface.launch()