File size: 859 Bytes
4c7a0b4
b12ea1f
 
 
4c7a0b4
f05dba6
b4c7950
b12ea1f
b4c7950
b12ea1f
 
 
 
 
 
 
 
4c7a0b4
b4c7950
 
 
 
 
 
 
 
 
 
f05dba6
 
b4c7950
f05dba6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter


def extract_text(pdf_file):
    
    # Load a document
    loader = PDFMinerLoader(pdf_file)
    doc = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(doc)

    texts = [i.page_content for i in texts]

    return texts[0]


# def upload_file(file):
#     return file.name

# with gr.Blocks() as demo:
#     file_output = gr.File()
#     upload_button = gr.UploadButton("Click to Upload a File", file_types="file")
#     upload_button.upload(upload_file, upload_button, file_output)
    

iface = gr.Interface(
    fn=extract_text,
    inputs=gr.File(type="filepath", label="Upload PDF"),
    outputs="text"
)

iface.launch()