File size: 620 Bytes
5306da4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from io import BytesIO
from docx import Document as DocxDocument
from pptx import Presentation



def extract_text_from_docx(content_bytes: bytes) -> str:
    doc = DocxDocument(BytesIO(content_bytes))
    paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
    return "\n".join(paragraphs)



def extract_text_from_pptx(content_bytes: bytes) -> str:
    prs = Presentation(BytesIO(content_bytes))
    texts = []
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text") and shape.text.strip():
                texts.append(shape.text)
    return "\n".join(texts)