|
gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>") |
|
gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>") |
|
|
|
with gr.Row(): |
|
mkd = gr.Markdown('<h4><center>Upload a PDF</center></h4>', scale=1) |
|
mkd = gr.Markdown('<h4><center><i>OR</i></center></h4>', scale=1) |
|
mkd = gr.Markdown('<h4><center>Provide a PDF link</center></h4>', scale=1) |
|
|
|
with gr.Row(equal_height=True): |
|
pdf_file = gr.File(label='PDF📃', file_count='single', scale=1) |
|
pdf_link = gr.Textbox(placeholder='Enter an Arxiv link here', label='PDF link🔗🌐', scale=1) |
|
|
|
with gr.Row(): |
|
btn = gr.Button('Run NOUGAT🍫') |
|
clr = gr.Button('Clear🚿') |
|
|
|
output_headline = gr.Markdown("<h3>PDF converted to markup language through Nougat-OCR👇:</h3>") |
|
parsed_output = gr.Markdown(elem_id='mkd', value='📃🔤OCR Output') |
|
mmd_file_download = gr.File(label='Download .mmd file', interactive=False) |
|
|
|
def handle_predict(pdf_file, pdf_link): |
|
content, mmd_file_path = predict(pdf_file, pdf_link) |
|
return gr.update(value=content), mmd_file_path |
|
|
|
btn.click(handle_predict, [pdf_file, pdf_link], [parsed_output, mmd_file_download]) |
|
clr.click(lambda: (gr.update(value=None), |
|
gr.update(value=None), |
|
gr.update(value=None)), |
|
[], |
|
[pdf_file, pdf_link, parsed_output, mmd_file_download]) |
|
|
|
gr.Examples( |
|
[["input/nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]], |
|
inputs=[pdf_file, pdf_link], |
|
outputs=parsed_output, |
|
fn=process_example, |
|
cache_examples=True, |
|
label='Click on any Examples below to get Nougat OCR results quickly:' |
|
) |
|
|
|
demo.queue() |
|
demo.launch(debug=True) |
|
|