File size: 620 Bytes
5306da4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from io import BytesIO
from docx import Document as DocxDocument
from pptx import Presentation
def extract_text_from_docx(content_bytes: bytes) -> str:
doc = DocxDocument(BytesIO(content_bytes))
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
return "\n".join(paragraphs)
def extract_text_from_pptx(content_bytes: bytes) -> str:
prs = Presentation(BytesIO(content_bytes))
texts = []
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text") and shape.text.strip():
texts.append(shape.text)
return "\n".join(texts)
|