from .IDataReader import IDataReader from PyPDF2 import PdfReader from docx import Document class DataReader(IDataReader): def read_pdf(self, file_path: str) -> str: """ Reads a PDF file and returns its text content. :param file_path: Path to the PDF file. :return: Text content of the PDF file. """ try: text = "" with open(file_path, "rb") as f: reader = PdfReader(f) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text except Exception as e: print(f"Error reading PDF file: {e}") return "" def read_docx(self, file_path: str) -> str: """ Reads a DOCX file and returns its text content. :param file_path: Path to the DOCX file. :return: Text content of the DOCX file. """ try: doc = Document(file_path) text = "\n".join([para.text for para in doc.paragraphs]) return text except Exception as e: print(f"Error reading DOCX file: {e}") return "" def read_txt(self, file_path: str) -> str: """ Reads a TXT file and returns its text content. :param file_path: Path to the TXT file. :return: Text content of the TXT file. """ try: with open(file_path, "r", encoding="utf-8") as f: text = f.read() return text except Exception as e: print(f"Error reading TXT file: {e}") return ""