Spaces:
Runtime error
Runtime error
from PyPDF2 import PdfFileReader | |
from tiktoken import Tokenizer | |
from tiktoken.models import GPT2 | |
import gradio as gr | |
def extract_text_from_pdf(file_path): | |
with open(file_path, "rb") as file: | |
pdf = PdfFileReader(file) | |
text = "" | |
for page_num in range(pdf.getNumPages()): | |
text += pdf.getPage(page_num).extractText() | |
return text | |
def count_tokens(text): | |
tokenizer = Tokenizer(GPT2()) | |
tokens = tokenizer.tokenize(text) | |
return len(tokens) | |
def count_tokens_in_file(file): | |
# Extract text from the PDF file | |
paper_text = extract_text_from_pdf(file.name) | |
return count_tokens(paper_text) | |
with gr.Blocks() as demo: | |
gr.Markdown("Upload your document to count their tokens") | |
with gr.Tab("Upload PDF & TXT"): | |
docs_input = gr.File(file_count="single", file_types=[".pdf"]) | |
tb_tokenCount = gr.Textbox(label='Number of tokens') | |
btn_count = gr.Button("Count token") | |
btn_count.click(count_tokens_in_file,inputs=[docs_input],outputs=[tb_tokenCount]) |