Skip to content

Commit cd41998

Browse files
committed
spike: implement 'Bookmarks is a sequence'
1 parent 660492e commit cd41998

File tree

15 files changed

+251
-33
lines changed

15 files changed

+251
-33
lines changed

docx/__init__.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
from docx.opc.parts.coreprops import CorePropertiesPart
1313

1414
from docx.parts.document import DocumentPart
15+
from docx.parts.endnotes import EndnotesPart
16+
from docx.parts.footer import FooterPart
17+
from docx.parts.footnotes import FootnotesPart
18+
from docx.parts.header import HeaderPart
1519
from docx.parts.image import ImagePart
1620
from docx.parts.numbering import NumberingPart
1721
from docx.parts.settings import SettingsPart
@@ -27,11 +31,16 @@ def part_class_selector(content_type, reltype):
2731
PartFactory.part_class_selector = part_class_selector
2832
PartFactory.part_type_for[CT.OPC_CORE_PROPERTIES] = CorePropertiesPart
2933
PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart
34+
PartFactory.part_type_for[CT.WML_ENDNOTES] = EndnotesPart
35+
PartFactory.part_type_for[CT.WML_FOOTER] = FooterPart
36+
PartFactory.part_type_for[CT.WML_FOOTNOTES] = FootnotesPart
37+
PartFactory.part_type_for[CT.WML_HEADER] = HeaderPart
3038
PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart
3139
PartFactory.part_type_for[CT.WML_SETTINGS] = SettingsPart
3240
PartFactory.part_type_for[CT.WML_STYLES] = StylesPart
3341

3442
del (
35-
CT, CorePropertiesPart, DocumentPart, NumberingPart, PartFactory,
36-
StylesPart, part_class_selector
43+
CT, CorePropertiesPart, DocumentPart, EndnotesPart, FooterPart,
44+
FootnotesPart, HeaderPart, NumberingPart, PartFactory, StylesPart,
45+
part_class_selector
3746
)

docx/bookmark.py

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,28 @@
66
absolute_import, division, print_function, unicode_literals
77
)
88

9+
from collections import Sequence
910
from itertools import chain
1011

12+
from docx.oxml.ns import qn
1113
from docx.shared import lazyproperty
1214

1315

14-
class Bookmarks(object):
16+
class Bookmarks(Sequence):
1517
"""Sequence of |Bookmark| objects."""
1618

1719
def __init__(self, document_part):
1820
self._document_part = document_part
1921

22+
def __getitem__(self, idx):
23+
bookmark_pair = self._finder.bookmark_pairs[idx]
24+
return _Bookmark(bookmark_pair)
25+
26+
def __iter__(self):
27+
# ---not strictly required, but improves performance over default
28+
# ---implementation that makes repeated calls to __getitem__()
29+
return (_Bookmark(pair) for pair in self._finder.bookmark_pairs)
30+
2031
def __len__(self):
2132
return len(self._finder.bookmark_pairs)
2233

@@ -26,6 +37,13 @@ def _finder(self):
2637
return _DocumentBookmarkFinder(self._document_part)
2738

2839

40+
class _Bookmark(object):
41+
"""Proxy for a (w:bookmarkStart, w:bookmarkEnd) element pair."""
42+
43+
def __init__(self, bookmark_pair):
44+
self._bookmarkStart, self._bookmarkEnd = bookmark_pair
45+
46+
2947
class _DocumentBookmarkFinder(object):
3048
"""Provides access to bookmark oxml elements in an overall document."""
3149

@@ -58,7 +76,58 @@ def bookmark_pairs(self):
5876
class _PartBookmarkFinder(object):
5977
"""Provides access to bookmark oxml elements in a story part."""
6078

79+
def __init__(self, part):
80+
self._part = part
81+
6182
@classmethod
6283
def iter_start_end_pairs(cls, part):
6384
"""Generate each (bookmarkStart, bookmarkEnd) in *part*."""
64-
raise NotImplementedError
85+
return cls(part)._iter_start_end_pairs()
86+
87+
def _iter_start_end_pairs(self):
88+
"""Generate each (bookmarkStart, bookmarkEnd) in this part."""
89+
for idx, bookmarkStart in self._iter_starts():
90+
# ---skip open pairs---
91+
bookmarkEnd = self._matching_end(bookmarkStart, idx)
92+
if bookmarkEnd is None:
93+
continue
94+
# ---skip duplicate names---
95+
if not self._add_to_names_so_far(bookmarkStart.name):
96+
continue
97+
yield (bookmarkStart, bookmarkEnd)
98+
99+
def _iter_starts(self):
100+
"""Generate (idx, bookmarkStart) elements in story.
101+
102+
The *idx* value indicates the location of the bookmarkStart element
103+
among all the bookmarkStart and bookmarkEnd elements in the story.
104+
"""
105+
for idx, element in enumerate(self._all_starts_and_ends):
106+
if element.tag == qn('w:bookmarkStart'):
107+
yield idx, element
108+
109+
@lazyproperty
110+
def _all_starts_and_ends(self):
111+
return self._part.element.xpath('//w:bookmarkStart|//w:bookmarkEnd')
112+
113+
def _matching_end(self, bookmarkStart, idx):
114+
for element in self._all_starts_and_ends[idx + 1:]:
115+
# ---skip bookmark starts---
116+
if element.tag == qn('w:bookmarkStart'):
117+
continue
118+
bookmarkEnd = element
119+
if bookmarkEnd.id == bookmarkStart.id:
120+
return bookmarkEnd
121+
return None
122+
123+
def _add_to_names_so_far(self, name):
124+
"""Return True if name was added, False if name already present."""
125+
names_so_far = self._names_so_far
126+
if name in names_so_far:
127+
return False
128+
names_so_far.add(name)
129+
return True
130+
131+
@lazyproperty
132+
def _names_so_far(self):
133+
return set()

docx/opc/part.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ def drop_rel(self, rId):
7474
if self._rel_ref_count(rId) < 2:
7575
del self.rels[rId]
7676

77+
def iter_parts_related_by(self, reltypes):
78+
"""Generate each part related to this by one of *reltypes*.
79+
80+
*reltypes* must be a container; `set` is convenient but list or other
81+
sequence types work fine.
82+
"""
83+
return (
84+
rel.target_part for rel in self.rels.values()
85+
if rel.reltype in reltypes
86+
)
87+
7788
@classmethod
7889
def load(cls, partname, content_type, blob, package):
7990
return cls(partname, content_type, blob, package)

docx/opc/rel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
class Relationships(dict):
1515
"""
16-
Collection object for |_Relationship| instances, having list semantics.
16+
Collection object for |_Relationship| instances, having dict semantics.
1717
"""
1818
def __init__(self, baseURI):
1919
super(Relationships, self).__init__()

docx/oxml/__init__.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,27 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
6464
# custom element class mappings
6565
# ===========================================================================
6666

67-
from .shared import CT_DecimalNumber, CT_OnOff, CT_String
67+
from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String
6868

6969

70-
from .coreprops import CT_CoreProperties
70+
from docx.oxml.bookmark import CT_Bookmark, CT_MarkupRange
71+
register_element_cls('w:bookmarkEnd', CT_MarkupRange)
72+
register_element_cls('w:bookmarkStart', CT_Bookmark)
73+
74+
from docx.oxml.coreprops import CT_CoreProperties
7175
register_element_cls('cp:coreProperties', CT_CoreProperties)
7276

73-
from .document import CT_Body, CT_Document
77+
from docx.oxml.document import CT_Body, CT_Document
7478
register_element_cls('w:body', CT_Body)
7579
register_element_cls('w:document', CT_Document)
7680

77-
from .numbering import (
81+
from docx.oxml.endnotes import CT_Endnotes
82+
register_element_cls('w:endnotes', CT_Endnotes)
83+
84+
from docx.oxml.footnotes import CT_Footnotes
85+
register_element_cls('w:footnotes', CT_Footnotes)
86+
87+
from docx.oxml.numbering import (
7888
CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr
7989
)
8090
register_element_cls('w:abstractNumId', CT_DecimalNumber)
@@ -86,13 +96,13 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
8696
register_element_cls('w:numbering', CT_Numbering)
8797
register_element_cls('w:startOverride', CT_DecimalNumber)
8898

89-
from .section import CT_PageMar, CT_PageSz, CT_SectPr, CT_SectType
99+
from docx.oxml.section import CT_PageMar, CT_PageSz, CT_SectPr, CT_SectType
90100
register_element_cls('w:pgMar', CT_PageMar)
91101
register_element_cls('w:pgSz', CT_PageSz)
92102
register_element_cls('w:sectPr', CT_SectPr)
93103
register_element_cls('w:type', CT_SectType)
94104

95-
from .shape import (
105+
from docx.oxml.shape import (
96106
CT_Blip, CT_BlipFillProperties, CT_GraphicalObject,
97107
CT_GraphicalObjectData, CT_Inline, CT_NonVisualDrawingProps, CT_Picture,
98108
CT_PictureNonVisual, CT_Point2D, CT_PositiveSize2D, CT_ShapeProperties,
@@ -113,7 +123,9 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
113123
register_element_cls('wp:extent', CT_PositiveSize2D)
114124
register_element_cls('wp:inline', CT_Inline)
115125

116-
from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles
126+
from docx.oxml.styles import (
127+
CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles
128+
)
117129
register_element_cls('w:basedOn', CT_String)
118130
register_element_cls('w:latentStyles', CT_LatentStyles)
119131
register_element_cls('w:locked', CT_OnOff)
@@ -127,7 +139,7 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
127139
register_element_cls('w:uiPriority', CT_DecimalNumber)
128140
register_element_cls('w:unhideWhenUsed', CT_OnOff)
129141

130-
from .table import (
142+
from docx.oxml.table import (
131143
CT_Height, CT_Row, CT_Tbl, CT_TblGrid, CT_TblGridCol, CT_TblLayoutType,
132144
CT_TblPr, CT_TblWidth, CT_Tc, CT_TcPr, CT_TrPr, CT_VerticalJc, CT_VMerge
133145
)
@@ -148,7 +160,7 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
148160
register_element_cls('w:vAlign', CT_VerticalJc)
149161
register_element_cls('w:vMerge', CT_VMerge)
150162

151-
from .text.font import (
163+
from docx.oxml.text.font import (
152164
CT_Color, CT_Fonts, CT_Highlight, CT_HpsMeasure, CT_RPr, CT_Underline,
153165
CT_VerticalAlignRun
154166
)
@@ -181,10 +193,10 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
181193
register_element_cls('w:vertAlign', CT_VerticalAlignRun)
182194
register_element_cls('w:webHidden', CT_OnOff)
183195

184-
from .text.paragraph import CT_P
196+
from docx.oxml.text.paragraph import CT_P
185197
register_element_cls('w:p', CT_P)
186198

187-
from .text.parfmt import (
199+
from docx.oxml.text.parfmt import (
188200
CT_Ind, CT_Jc, CT_PPr, CT_Spacing, CT_TabStop, CT_TabStops
189201
)
190202
register_element_cls('w:ind', CT_Ind)
@@ -199,7 +211,11 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
199211
register_element_cls('w:tabs', CT_TabStops)
200212
register_element_cls('w:widowControl', CT_OnOff)
201213

202-
from .text.run import CT_Br, CT_R, CT_Text
214+
from docx.oxml.text.run import CT_Br, CT_R, CT_Text
203215
register_element_cls('w:br', CT_Br)
204216
register_element_cls('w:r', CT_R)
205217
register_element_cls('w:t', CT_Text)
218+
219+
from docx.oxml.header import CT_HdrFtr
220+
register_element_cls('w:hdr', CT_HdrFtr)
221+
register_element_cls('w:ftr', CT_HdrFtr)

docx/oxml/bookmark.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# encoding: utf-8
2+
3+
"""Custom element classes related to bookmarks."""
4+
5+
from __future__ import (
6+
absolute_import, division, print_function, unicode_literals
7+
)
8+
9+
from docx.oxml.simpletypes import ST_DecimalNumber, ST_String
10+
from docx.oxml.xmlchemy import BaseOxmlElement, RequiredAttribute
11+
12+
13+
class CT_Bookmark(BaseOxmlElement):
14+
"""w:bookmarkStart element"""
15+
id = RequiredAttribute('w:id', ST_DecimalNumber)
16+
name = RequiredAttribute('w:name', ST_String)
17+
18+
19+
class CT_MarkupRange(BaseOxmlElement):
20+
"""w:bookmarkEnd element"""
21+
id = RequiredAttribute('w:id', ST_DecimalNumber)

docx/oxml/endnotes.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# encoding: utf-8
2+
3+
"""Custom element classes related to end-notes."""
4+
5+
from __future__ import (
6+
absolute_import, division, print_function, unicode_literals
7+
)
8+
9+
from docx.oxml.xmlchemy import BaseOxmlElement
10+
11+
12+
class CT_Endnotes(BaseOxmlElement):
13+
"""w:endnotes element"""

docx/oxml/footnotes.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# encoding: utf-8
2+
3+
"""Custom element classes related to footnotes."""
4+
5+
from __future__ import (
6+
absolute_import, division, print_function, unicode_literals
7+
)
8+
9+
from docx.oxml.xmlchemy import BaseOxmlElement
10+
11+
12+
class CT_Footnotes(BaseOxmlElement):
13+
"""w:footnotes element"""

docx/oxml/header.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# encoding: utf-8
2+
3+
"""Custom element classes related to headers and footers."""
4+
5+
from .xmlchemy import BaseOxmlElement
6+
7+
8+
class CT_HdrFtr(BaseOxmlElement):
9+
"""Used for w:hdr and w:ftr."""

docx/parts/document.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,34 @@
11
# encoding: utf-8
22

3-
"""
4-
|DocumentPart| and closely related objects
5-
"""
3+
"""|DocumentPart| and closely related objects."""
64

75
from __future__ import (
86
absolute_import, division, print_function, unicode_literals
97
)
108

11-
from ..document import Document
12-
from .numbering import NumberingPart
13-
from ..opc.constants import RELATIONSHIP_TYPE as RT
14-
from ..opc.part import XmlPart
15-
from ..oxml.shape import CT_Inline
16-
from ..shape import InlineShapes
17-
from ..shared import lazyproperty
18-
from .settings import SettingsPart
19-
from .styles import StylesPart
9+
from itertools import chain
10+
11+
from docx.document import Document
12+
from docx.opc.constants import RELATIONSHIP_TYPE as RT
13+
from docx.opc.part import XmlPart
14+
from docx.oxml.shape import CT_Inline
15+
from docx.parts.numbering import NumberingPart
16+
from docx.parts.settings import SettingsPart
17+
from docx.parts.styles import StylesPart
18+
from docx.shape import InlineShapes
19+
from docx.shared import lazyproperty
2020

2121

2222
class DocumentPart(XmlPart):
23-
"""
24-
Main document part of a WordprocessingML (WML) package, aka a .docx file.
23+
"""Main document part of a WordprocessingML (WML) package.
24+
2525
Acts as broker to other parts such as image, core properties, and style
2626
parts. It also acts as a convenient delegate when a mid-document object
2727
needs a service involving a remote ancestor. The `Parented.part` property
2828
inherited by many content objects provides access to this part object for
2929
that purpose.
3030
"""
31+
3132
@property
3233
def core_properties(self):
3334
"""
@@ -90,7 +91,12 @@ def iter_story_parts(self):
9091
Story parts include this main document part, headers, footers,
9192
footnotes, and endnotes.
9293
"""
93-
raise NotImplementedError
94+
return chain(
95+
(self,),
96+
self.iter_parts_related_by(
97+
{RT.HEADER, RT.FOOTER, RT.FOOTNOTES, RT.ENDNOTES}
98+
)
99+
)
94100

95101
def new_pic_inline(self, image_descriptor, width, height):
96102
"""

0 commit comments

Comments
 (0)