|
"""WordprocessingML Package class and related objects.""" |
|
|
|
from __future__ import annotations |
|
|
|
from typing import IO, cast |
|
|
|
from docx.image.image import Image |
|
from docx.opc.constants import RELATIONSHIP_TYPE as RT |
|
from docx.opc.package import OpcPackage |
|
from docx.opc.packuri import PackURI |
|
from docx.parts.image import ImagePart |
|
from docx.shared import lazyproperty |
|
|
|
|
|
class Package(OpcPackage): |
|
"""Customizations specific to a WordprocessingML package.""" |
|
|
|
def after_unmarshal(self): |
|
"""Called by loading code after all parts and relationships have been loaded. |
|
|
|
This method affords the opportunity for any required post-processing. |
|
""" |
|
self._gather_image_parts() |
|
|
|
def get_or_add_image_part(self, image_descriptor: str | IO[bytes]) -> ImagePart: |
|
"""Return |ImagePart| containing image specified by `image_descriptor`. |
|
|
|
The image-part is newly created if a matching one is not already present in the |
|
collection. |
|
""" |
|
return self.image_parts.get_or_add_image_part(image_descriptor) |
|
|
|
@lazyproperty |
|
def image_parts(self) -> ImageParts: |
|
"""|ImageParts| collection object for this package.""" |
|
return ImageParts() |
|
|
|
def _gather_image_parts(self): |
|
"""Load the image part collection with all the image parts in package.""" |
|
for rel in self.iter_rels(): |
|
if rel.is_external: |
|
continue |
|
if rel.reltype != RT.IMAGE: |
|
continue |
|
if rel.target_part in self.image_parts: |
|
continue |
|
self.image_parts.append(cast("ImagePart", rel.target_part)) |
|
|
|
|
|
class ImageParts: |
|
"""Collection of |ImagePart| objects corresponding to images in the package.""" |
|
|
|
def __init__(self): |
|
self._image_parts: list[ImagePart] = [] |
|
|
|
def __contains__(self, item: object): |
|
return self._image_parts.__contains__(item) |
|
|
|
def __iter__(self): |
|
return self._image_parts.__iter__() |
|
|
|
def __len__(self): |
|
return self._image_parts.__len__() |
|
|
|
def append(self, item: ImagePart): |
|
self._image_parts.append(item) |
|
|
|
def get_or_add_image_part(self, image_descriptor: str | IO[bytes]) -> ImagePart: |
|
"""Return |ImagePart| object containing image identified by `image_descriptor`. |
|
|
|
The image-part is newly created if a matching one is not present in the |
|
collection. |
|
""" |
|
image = Image.from_file(image_descriptor) |
|
matching_image_part = self._get_by_sha1(image.sha1) |
|
if matching_image_part is not None: |
|
return matching_image_part |
|
return self._add_image_part(image) |
|
|
|
def _add_image_part(self, image: Image): |
|
"""Return |ImagePart| instance newly created from `image` and appended to the collection.""" |
|
partname = self._next_image_partname(image.ext) |
|
image_part = ImagePart.from_image(image, partname) |
|
self.append(image_part) |
|
return image_part |
|
|
|
def _get_by_sha1(self, sha1: str) -> ImagePart | None: |
|
"""Return the image part in this collection having a SHA1 hash matching `sha1`, |
|
or |None| if not found.""" |
|
for image_part in self._image_parts: |
|
if image_part.sha1 == sha1: |
|
return image_part |
|
return None |
|
|
|
def _next_image_partname(self, ext: str) -> PackURI: |
|
"""The next available image partname, starting from ``/word/media/image1.{ext}`` |
|
where unused numbers are reused. |
|
|
|
The partname is unique by number, without regard to the extension. `ext` does |
|
not include the leading period. |
|
""" |
|
|
|
def image_partname(n: int) -> PackURI: |
|
return PackURI("/word/media/image%d.%s" % (n, ext)) |
|
|
|
used_numbers = [image_part.partname.idx for image_part in self] |
|
for n in range(1, len(self) + 1): |
|
if n not in used_numbers: |
|
return image_partname(n) |
|
return image_partname(len(self) + 1) |
|
|