2
2
3
3
from __future__ import annotations
4
4
5
+ import copy
6
+ from typing import TYPE_CHECKING
7
+
5
8
from docx .oxml .xmlchemy import BaseOxmlElement
9
+ from docx .shared import lazyproperty
10
+
11
+ if TYPE_CHECKING :
12
+ from docx .oxml .text .hyperlink import CT_Hyperlink
13
+ from docx .oxml .text .paragraph import CT_P
6
14
7
15
8
16
class CT_LastRenderedPageBreak (BaseOxmlElement ):
@@ -16,3 +24,151 @@ class CT_LastRenderedPageBreak(BaseOxmlElement):
16
24
`w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
17
25
distinguished behavior. CT_Empty is used for many elements.
18
26
"""
27
+
28
+ @property
29
+ def precedes_all_content (self ) -> bool :
30
+ """True when a `w:lastRenderedPageBreak` precedes all paragraph content.
31
+
32
+ This is a common case; it occurs whenever the page breaks on an even paragraph
33
+ boundary.
34
+ """
35
+ # -- a page-break inside a hyperlink never meets these criteria because there
36
+ # -- is always part of the hyperlink text before the page-break.
37
+ if self ._is_in_hyperlink :
38
+ return False
39
+
40
+ return bool (
41
+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
42
+ self ._enclosing_p .xpath (
43
+ # -- in first run of paragraph --
44
+ f"./w:r[1]"
45
+ # -- all page-breaks --
46
+ f"/w:lastRenderedPageBreak"
47
+ # -- that are not preceded by any content-bearing elements --
48
+ f"[not(preceding-sibling::*[{ self ._run_inner_content_xpath } ])]"
49
+ )
50
+ )
51
+
52
+ @property
53
+ def preceding_fragment_p (self ) -> CT_P :
54
+ """A "loose" `CT_P` containing only the paragraph content before this break.
55
+
56
+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
57
+ paragraph in its paragraph.
58
+
59
+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
60
+ page-break with this `w:lastRenderedPageBreak` element and all its following
61
+ siblings removed.
62
+ """
63
+ if not self == self ._first_lrpb_in_p (self ._enclosing_p ):
64
+ raise ValueError ("only defined on first rendered page-break in paragraph" )
65
+
66
+ # -- splitting approach is different when break is inside a hyperlink --
67
+ return (
68
+ self ._preceding_frag_in_hlink
69
+ if self ._is_in_hyperlink
70
+ else self ._preceding_frag_in_run
71
+ )
72
+
73
+ def _enclosing_hyperlink (self , lrpb : CT_LastRenderedPageBreak ) -> CT_Hyperlink :
74
+ """The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
75
+
76
+ Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
77
+ when `._is_in_hyperlink` is True.
78
+ """
79
+ return lrpb .xpath ("./parent::w:r/parent::w:hyperlink" )[0 ]
80
+
81
+ @property
82
+ def _enclosing_p (self ) -> CT_P :
83
+ """The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
84
+ return self .xpath ("./ancestor::w:p[1]" )[0 ]
85
+
86
+ def _first_lrpb_in_p (self , p : CT_P ) -> CT_LastRenderedPageBreak :
87
+ """The first `w:lastRenderedPageBreak` element in `p`.
88
+
89
+ Raises `ValueError` if there are no rendered page-breaks in `p`.
90
+ """
91
+ lrpbs = p .xpath (
92
+ "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
93
+ )
94
+ if not lrpbs :
95
+ raise ValueError ("no rendered page-breaks in paragraph element" )
96
+ return lrpbs [0 ]
97
+
98
+ @lazyproperty
99
+ def _is_in_hyperlink (self ) -> bool :
100
+ """True when this page-break is embedded in a hyperlink run."""
101
+ return bool (self .xpath ("./parent::w:r/parent::w:hyperlink" ))
102
+
103
+ @lazyproperty
104
+ def _preceding_frag_in_hlink (self ) -> CT_P :
105
+ """Preceding CT_P fragment when break occurs within a hyperlink.
106
+
107
+ Note this is a *partial-function* and raises when `lrpb` is not inside a
108
+ hyperlink.
109
+ """
110
+ if not self ._is_in_hyperlink :
111
+ raise ValueError ("only defined on a rendered page-break in a hyperlink" )
112
+
113
+ # -- work on a clone `w:p` so our mutations don't persist --
114
+ p = copy .deepcopy (self ._enclosing_p )
115
+
116
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
117
+ lrpb = self ._first_lrpb_in_p (p )
118
+
119
+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
120
+ hyperlink = lrpb ._enclosing_hyperlink (lrpb )
121
+
122
+ # -- delete all w:p inner-content following the hyperlink --
123
+ for e in hyperlink .xpath ("./following-sibling::*" ):
124
+ p .remove (e )
125
+
126
+ # -- remove this page-break from inside the hyperlink --
127
+ lrpb .getparent ().remove (lrpb )
128
+
129
+ # -- that's it, the entire hyperlink goes into the preceding fragment so
130
+ # -- the hyperlink is not "split".
131
+ return p
132
+
133
+ @lazyproperty
134
+ def _preceding_frag_in_run (self ) -> CT_P :
135
+ """Preceding CT_P fragment when break does not occur in a hyperlink.
136
+
137
+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
138
+ """
139
+ if self ._is_in_hyperlink :
140
+ raise ValueError ("only defined on a rendered page-break not in a hyperlink" )
141
+
142
+ # -- work on a clone `w:p` so our mutations don't persist --
143
+ p = copy .deepcopy (self ._enclosing_p )
144
+
145
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
146
+ lrpb = self ._first_lrpb_in_p (p )
147
+
148
+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
149
+ enclosing_r = lrpb .xpath ("./parent::w:r" )[0 ]
150
+
151
+ # -- delete all `w:p` inner-content following that run --
152
+ for e in enclosing_r .xpath ("./following-sibling::*" ):
153
+ p .remove (e )
154
+
155
+ # -- then delete all `w:r` inner-content following this lrpb in its run and
156
+ # -- also remove the page-break itself
157
+ for e in lrpb .xpath ("./following-sibling::*" ):
158
+ enclosing_r .remove (e )
159
+ enclosing_r .remove (lrpb )
160
+
161
+ return p
162
+
163
+ @lazyproperty
164
+ def _run_inner_content_xpath (self ) -> str :
165
+ """XPath fragment matching any run inner-content elements."""
166
+ return (
167
+ "self::w:br"
168
+ " | self::w:cr"
169
+ " | self::w:drawing"
170
+ " | self::w:noBreakHyphen"
171
+ " | self::w:ptab"
172
+ " | self::w:t"
173
+ " | self::w:tab"
174
+ )
0 commit comments