Spaces:

yasserrmd
/

NotebookLlama

Running

yasserrmd commited on Oct 30, 2024

Commit

2fd0e2b

verified ·

1 Parent(s): 534c98c

Update extract_text_from_pdf.py

Files changed (1) hide show

extract_text_from_pdf.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import torch
 from PyPDF2 import PdfReader
 from accelerate import Accelerator
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -9,6 +10,7 @@ from tqdm import tqdm
 import warnings
 warnings.filterwarnings('ignore')
@@ -16,7 +18,7 @@ class PDFTextExtractor:
     """
     A class to handle PDF text extraction and preprocessing for podcast preparation.
     """
     def __init__(self, pdf_path, output_path, model_name="meta-llama/Llama-3.2-1B-Instruct"):
         """
         Initialize the PDFTextExtractor with paths and model details.
@@ -109,6 +111,7 @@ class PDFTextExtractor:
         return chunks
     def process_chunk(self, text_chunk):
         """Process a text chunk with the model and return the cleaned text."""
         conversation = [

 import os
 import torch
+import spaces
 from PyPDF2 import PdfReader
 from accelerate import Accelerator
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import warnings
 warnings.filterwarnings('ignore')
     """
     A class to handle PDF text extraction and preprocessing for podcast preparation.
     """
+    @spaces.GPU
     def __init__(self, pdf_path, output_path, model_name="meta-llama/Llama-3.2-1B-Instruct"):
         """
         Initialize the PDFTextExtractor with paths and model details.
         return chunks
+    @spaces.GPU
     def process_chunk(self, text_chunk):
         """Process a text chunk with the model and return the cleaned text."""
         conversation = [