File size: 364 Bytes
d487adb
 
806d7c6
 
 
d487adb
806d7c6
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from typing import List

import pypdf


def read_pdf(filepath: str) -> List[str]:
    outputs = []
    with open(filepath, 'rb') as f:
        pdf_reader = pypdf.PdfReader(f)
        for page in pdf_reader.pages:
            outputs.append(page.extract_text())
    return outputs


if __name__ == '__main__':
    r = read_pdf('data/109-411-2-PB.pdf')
    print(r)