Spaces:
Runtime error
Runtime error
import gradio as gr | |
from PyPDF4 import PdfFileReader | |
import tiktoken | |
def extract_text_from_pdf(file_path): | |
with open(file_path, "rb") as file: | |
pdf = PdfFileReader(file) | |
text = "" | |
for page_num in range(pdf.getNumPages()): | |
text += pdf.getPage(page_num).extractText() | |
return text | |
def count_tokens(text): | |
tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
tokens = tokenizer.encode( | |
text, | |
disallowed_special=() | |
) | |
return len(tokens) | |
def count_tokens_in_file(file): | |
# Extract text from the PDF file | |
paper_text = extract_text_from_pdf(file.name) | |
return count_tokens(paper_text) | |
with gr.Blocks() as demo: | |
gr.Markdown("Upload your document to count their tokens") | |
with gr.Tab("Upload PDF & TXT"): | |
docs_input = gr.File(file_count="single", file_types=[".pdf"]) | |
tb_tokenCount = gr.Textbox(label='Number of tokens') | |
docs_input.upload(count_tokens_in_file,inputs=[docs_input],outputs=[tb_tokenCount]) | |
btn_count = gr.Button("Count token") | |
btn_count.click(count_tokens_in_file,inputs=[docs_input],outputs=[tb_tokenCount]) | |
#demo.queue() | |
demo.launch(debug=True,share=False) |