@@ -25,6 +25,57 @@ class CT_LastRenderedPageBreak(BaseOxmlElement):
25
25
distinguished behavior. CT_Empty is used for many elements.
26
26
"""
27
27
28
+ @property
29
+ def following_fragment_p (self ) -> CT_P :
30
+ """A "loose" `CT_P` containing only the paragraph content before this break.
31
+
32
+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
33
+ page-break in its paragraph.
34
+
35
+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
36
+ page-break with this `w:lastRenderedPageBreak` element and all content preceding
37
+ it removed.
38
+
39
+ NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
40
+ (when the paragraph contained more than one). While this is rare, the caller
41
+ should treat this paragraph the same as other paragraphs and split it if
42
+ necessary in a folloing step or recursion.
43
+ """
44
+ if not self == self ._first_lrpb_in_p (self ._enclosing_p ):
45
+ raise ValueError ("only defined on first rendered page-break in paragraph" )
46
+
47
+ # -- splitting approach is different when break is inside a hyperlink --
48
+ return (
49
+ self ._following_frag_in_hlink
50
+ if self ._is_in_hyperlink
51
+ else self ._following_frag_in_run
52
+ )
53
+
54
+ @property
55
+ def follows_all_content (self ) -> bool :
56
+ """True when this page-break element is the last "content" in the paragraph.
57
+
58
+ This is very uncommon case and may only occur in contrived or cases where the
59
+ XML is edited by hand, but it is not precluded by the spec.
60
+ """
61
+ # -- a page-break inside a hyperlink never meets these criteria (for our
62
+ # -- purposes at least) because it is considered "atomic" and always associated
63
+ # -- with the page it starts on.
64
+ if self ._is_in_hyperlink :
65
+ return False
66
+
67
+ return bool (
68
+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
69
+ self ._enclosing_p .xpath (
70
+ # -- in first run of paragraph --
71
+ f"(./w:r)[last()]"
72
+ # -- all page-breaks --
73
+ f"/w:lastRenderedPageBreak"
74
+ # -- that are not preceded by any content-bearing elements --
75
+ f"[not(following-sibling::*[{ self ._run_inner_content_xpath } ])]"
76
+ )
77
+ )
78
+
28
79
@property
29
80
def precedes_all_content (self ) -> bool :
30
81
"""True when a `w:lastRenderedPageBreak` precedes all paragraph content.
@@ -95,6 +146,65 @@ def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
95
146
raise ValueError ("no rendered page-breaks in paragraph element" )
96
147
return lrpbs [0 ]
97
148
149
+ @lazyproperty
150
+ def _following_frag_in_hlink (self ) -> CT_P :
151
+ """Following CT_P fragment when break occurs within a hyperlink.
152
+
153
+ Note this is a *partial-function* and raises when `lrpb` is not inside a
154
+ hyperlink.
155
+ """
156
+ if not self ._is_in_hyperlink :
157
+ raise ValueError ("only defined on a rendered page-break in a hyperlink" )
158
+
159
+ # -- work on a clone `w:p` so our mutations don't persist --
160
+ p = copy .deepcopy (self ._enclosing_p )
161
+
162
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
163
+ lrpb = self ._first_lrpb_in_p (p )
164
+
165
+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
166
+ hyperlink = lrpb ._enclosing_hyperlink (lrpb )
167
+
168
+ # -- delete all w:p inner-content preceding the hyperlink --
169
+ for e in hyperlink .xpath ("./preceding-sibling::*[not(self::w:pPr)]" ):
170
+ p .remove (e )
171
+
172
+ # -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
173
+ hyperlink .getparent ().remove (hyperlink )
174
+
175
+ # -- that's it, return the remaining fragment of `w:p` clone --
176
+ return p
177
+
178
+ @lazyproperty
179
+ def _following_frag_in_run (self ) -> CT_P :
180
+ """following CT_P fragment when break does not occur in a hyperlink.
181
+
182
+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
183
+ """
184
+ if self ._is_in_hyperlink :
185
+ raise ValueError ("only defined on a rendered page-break not in a hyperlink" )
186
+
187
+ # -- work on a clone `w:p` so our mutations don't persist --
188
+ p = copy .deepcopy (self ._enclosing_p )
189
+
190
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
191
+ lrpb = self ._first_lrpb_in_p (p )
192
+
193
+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
194
+ enclosing_r = lrpb .xpath ("./parent::w:r" )[0 ]
195
+
196
+ # -- delete all w:p inner-content preceding that run (but not w:pPr) --
197
+ for e in enclosing_r .xpath ("./preceding-sibling::*[not(self::w:pPr)]" ):
198
+ p .remove (e )
199
+
200
+ # -- then remove all run inner-content preceding this lrpb in its run (but not
201
+ # -- the `w:rPr`) and also remove the page-break itself
202
+ for e in lrpb .xpath ("./preceding-sibling::*[not(self::w:rPr)]" ):
203
+ enclosing_r .remove (e )
204
+ enclosing_r .remove (lrpb )
205
+
206
+ return p
207
+
98
208
@lazyproperty
99
209
def _is_in_hyperlink (self ) -> bool :
100
210
"""True when this page-break is embedded in a hyperlink run."""
0 commit comments