File size: 3,971 Bytes
d015b2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""WordprocessingML Package class and related objects."""

from __future__ import annotations

from typing import IO, cast

from docx.image.image import Image
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.opc.package import OpcPackage
from docx.opc.packuri import PackURI
from docx.parts.image import ImagePart
from docx.shared import lazyproperty


class Package(OpcPackage):
    """Customizations specific to a WordprocessingML package."""

    def after_unmarshal(self):
        """Called by loading code after all parts and relationships have been loaded.

        This method affords the opportunity for any required post-processing.
        """
        self._gather_image_parts()

    def get_or_add_image_part(self, image_descriptor: str | IO[bytes]) -> ImagePart:
        """Return |ImagePart| containing image specified by `image_descriptor`.

        The image-part is newly created if a matching one is not already present in the
        collection.
        """
        return self.image_parts.get_or_add_image_part(image_descriptor)

    @lazyproperty
    def image_parts(self) -> ImageParts:
        """|ImageParts| collection object for this package."""
        return ImageParts()

    def _gather_image_parts(self):
        """Load the image part collection with all the image parts in package."""
        for rel in self.iter_rels():
            if rel.is_external:
                continue
            if rel.reltype != RT.IMAGE:
                continue
            if rel.target_part in self.image_parts:
                continue
            self.image_parts.append(cast("ImagePart", rel.target_part))


class ImageParts:
    """Collection of |ImagePart| objects corresponding to images in the package."""

    def __init__(self):
        self._image_parts: list[ImagePart] = []

    def __contains__(self, item: object):
        return self._image_parts.__contains__(item)

    def __iter__(self):
        return self._image_parts.__iter__()

    def __len__(self):
        return self._image_parts.__len__()

    def append(self, item: ImagePart):
        self._image_parts.append(item)

    def get_or_add_image_part(self, image_descriptor: str | IO[bytes]) -> ImagePart:
        """Return |ImagePart| object containing image identified by `image_descriptor`.

        The image-part is newly created if a matching one is not present in the
        collection.
        """
        image = Image.from_file(image_descriptor)
        matching_image_part = self._get_by_sha1(image.sha1)
        if matching_image_part is not None:
            return matching_image_part
        return self._add_image_part(image)

    def _add_image_part(self, image: Image):
        """Return |ImagePart| instance newly created from `image` and appended to the collection."""
        partname = self._next_image_partname(image.ext)
        image_part = ImagePart.from_image(image, partname)
        self.append(image_part)
        return image_part

    def _get_by_sha1(self, sha1: str) -> ImagePart | None:
        """Return the image part in this collection having a SHA1 hash matching `sha1`,
        or |None| if not found."""
        for image_part in self._image_parts:
            if image_part.sha1 == sha1:
                return image_part
        return None

    def _next_image_partname(self, ext: str) -> PackURI:
        """The next available image partname, starting from ``/word/media/image1.{ext}``
        where unused numbers are reused.

        The partname is unique by number, without regard to the extension. `ext` does
        not include the leading period.
        """

        def image_partname(n: int) -> PackURI:
            return PackURI("/word/media/image%d.%s" % (n, ext))

        used_numbers = [image_part.partname.idx for image_part in self]
        for n in range(1, len(self) + 1):
            if n not in used_numbers:
                return image_partname(n)
        return image_partname(len(self) + 1)