OLD | NEW |
(Empty) | |
| 1 # |
| 2 # ElementTree |
| 3 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ |
| 4 # |
| 5 # limited xpath support for element trees |
| 6 # |
| 7 # history: |
| 8 # 2003-05-23 fl created |
| 9 # 2003-05-28 fl added support for // etc |
| 10 # 2003-08-27 fl fixed parsing of periods in element names |
| 11 # 2007-09-10 fl new selection engine |
| 12 # 2007-09-12 fl fixed parent selector |
| 13 # 2007-09-13 fl added iterfind; changed findall to return a list |
| 14 # 2007-11-30 fl added namespaces support |
| 15 # 2009-10-30 fl added child element value filter |
| 16 # |
| 17 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. |
| 18 # |
| 19 # fredrik@pythonware.com |
| 20 # http://www.pythonware.com |
| 21 # |
| 22 # -------------------------------------------------------------------- |
| 23 # The ElementTree toolkit is |
| 24 # |
| 25 # Copyright (c) 1999-2009 by Fredrik Lundh |
| 26 # |
| 27 # By obtaining, using, and/or copying this software and/or its |
| 28 # associated documentation, you agree that you have read, understood, |
| 29 # and will comply with the following terms and conditions: |
| 30 # |
| 31 # Permission to use, copy, modify, and distribute this software and |
| 32 # its associated documentation for any purpose and without fee is |
| 33 # hereby granted, provided that the above copyright notice appears in |
| 34 # all copies, and that both that copyright notice and this permission |
| 35 # notice appear in supporting documentation, and that the name of |
| 36 # Secret Labs AB or the author not be used in advertising or publicity |
| 37 # pertaining to distribution of the software without specific, written |
| 38 # prior permission. |
| 39 # |
| 40 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD |
| 41 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- |
| 42 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR |
| 43 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY |
| 44 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
| 45 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
| 46 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
| 47 # OF THIS SOFTWARE. |
| 48 # -------------------------------------------------------------------- |
| 49 |
| 50 # Licensed to PSF under a Contributor Agreement. |
| 51 # See http://www.python.org/psf/license for licensing details. |
| 52 |
| 53 ## |
| 54 # Implementation module for XPath support. There's usually no reason |
| 55 # to import this module directly; the <b>ElementTree</b> does this for |
| 56 # you, if needed. |
| 57 ## |
| 58 |
| 59 import re |
| 60 |
| 61 xpath_tokenizer_re = re.compile( |
| 62 "(" |
| 63 "'[^']*'|\"[^\"]*\"|" |
| 64 "::|" |
| 65 "//?|" |
| 66 "\.\.|" |
| 67 "\(\)|" |
| 68 "[/.*:\[\]\(\)@=])|" |
| 69 "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" |
| 70 "\s+" |
| 71 ) |
| 72 |
| 73 def xpath_tokenizer(pattern, namespaces=None): |
| 74 for token in xpath_tokenizer_re.findall(pattern): |
| 75 tag = token[1] |
| 76 if tag and tag[0] != "{" and ":" in tag: |
| 77 try: |
| 78 prefix, uri = tag.split(":", 1) |
| 79 if not namespaces: |
| 80 raise KeyError |
| 81 yield token[0], "{%s}%s" % (namespaces[prefix], uri) |
| 82 except KeyError: |
| 83 raise SyntaxError("prefix %r not found in prefix map" % prefix) |
| 84 else: |
| 85 yield token |
| 86 |
| 87 def get_parent_map(context): |
| 88 parent_map = context.parent_map |
| 89 if parent_map is None: |
| 90 context.parent_map = parent_map = {} |
| 91 for p in context.root.iter(): |
| 92 for e in p: |
| 93 parent_map[e] = p |
| 94 return parent_map |
| 95 |
| 96 def prepare_child(next, token): |
| 97 tag = token[1] |
| 98 def select(context, result): |
| 99 for elem in result: |
| 100 for e in elem: |
| 101 if e.tag == tag: |
| 102 yield e |
| 103 return select |
| 104 |
| 105 def prepare_star(next, token): |
| 106 def select(context, result): |
| 107 for elem in result: |
| 108 for e in elem: |
| 109 yield e |
| 110 return select |
| 111 |
| 112 def prepare_self(next, token): |
| 113 def select(context, result): |
| 114 for elem in result: |
| 115 yield elem |
| 116 return select |
| 117 |
| 118 def prepare_descendant(next, token): |
| 119 token = next() |
| 120 if token[0] == "*": |
| 121 tag = "*" |
| 122 elif not token[0]: |
| 123 tag = token[1] |
| 124 else: |
| 125 raise SyntaxError("invalid descendant") |
| 126 def select(context, result): |
| 127 for elem in result: |
| 128 for e in elem.iter(tag): |
| 129 if e is not elem: |
| 130 yield e |
| 131 return select |
| 132 |
| 133 def prepare_parent(next, token): |
| 134 def select(context, result): |
| 135 # FIXME: raise error if .. is applied at toplevel? |
| 136 parent_map = get_parent_map(context) |
| 137 result_map = {} |
| 138 for elem in result: |
| 139 if elem in parent_map: |
| 140 parent = parent_map[elem] |
| 141 if parent not in result_map: |
| 142 result_map[parent] = None |
| 143 yield parent |
| 144 return select |
| 145 |
| 146 def prepare_predicate(next, token): |
| 147 # FIXME: replace with real parser!!! refs: |
| 148 # http://effbot.org/zone/simple-iterator-parser.htm |
| 149 # http://javascript.crockford.com/tdop/tdop.html |
| 150 signature = [] |
| 151 predicate = [] |
| 152 while 1: |
| 153 token = next() |
| 154 if token[0] == "]": |
| 155 break |
| 156 if token[0] and token[0][:1] in "'\"": |
| 157 token = "'", token[0][1:-1] |
| 158 signature.append(token[0] or "-") |
| 159 predicate.append(token[1]) |
| 160 signature = "".join(signature) |
| 161 # use signature to determine predicate type |
| 162 if signature == "@-": |
| 163 # [@attribute] predicate |
| 164 key = predicate[1] |
| 165 def select(context, result): |
| 166 for elem in result: |
| 167 if elem.get(key) is not None: |
| 168 yield elem |
| 169 return select |
| 170 if signature == "@-='": |
| 171 # [@attribute='value'] |
| 172 key = predicate[1] |
| 173 value = predicate[-1] |
| 174 def select(context, result): |
| 175 for elem in result: |
| 176 if elem.get(key) == value: |
| 177 yield elem |
| 178 return select |
| 179 if signature == "-" and not re.match("\d+$", predicate[0]): |
| 180 # [tag] |
| 181 tag = predicate[0] |
| 182 def select(context, result): |
| 183 for elem in result: |
| 184 if elem.find(tag) is not None: |
| 185 yield elem |
| 186 return select |
| 187 if signature == "-='" and not re.match("\d+$", predicate[0]): |
| 188 # [tag='value'] |
| 189 tag = predicate[0] |
| 190 value = predicate[-1] |
| 191 def select(context, result): |
| 192 for elem in result: |
| 193 for e in elem.findall(tag): |
| 194 if "".join(e.itertext()) == value: |
| 195 yield elem |
| 196 break |
| 197 return select |
| 198 if signature == "-" or signature == "-()" or signature == "-()-": |
| 199 # [index] or [last()] or [last()-index] |
| 200 if signature == "-": |
| 201 index = int(predicate[0]) - 1 |
| 202 else: |
| 203 if predicate[0] != "last": |
| 204 raise SyntaxError("unsupported function") |
| 205 if signature == "-()-": |
| 206 try: |
| 207 index = int(predicate[2]) - 1 |
| 208 except ValueError: |
| 209 raise SyntaxError("unsupported expression") |
| 210 else: |
| 211 index = -1 |
| 212 def select(context, result): |
| 213 parent_map = get_parent_map(context) |
| 214 for elem in result: |
| 215 try: |
| 216 parent = parent_map[elem] |
| 217 # FIXME: what if the selector is "*" ? |
| 218 elems = list(parent.findall(elem.tag)) |
| 219 if elems[index] is elem: |
| 220 yield elem |
| 221 except (IndexError, KeyError): |
| 222 pass |
| 223 return select |
| 224 raise SyntaxError("invalid predicate") |
| 225 |
| 226 ops = { |
| 227 "": prepare_child, |
| 228 "*": prepare_star, |
| 229 ".": prepare_self, |
| 230 "..": prepare_parent, |
| 231 "//": prepare_descendant, |
| 232 "[": prepare_predicate, |
| 233 } |
| 234 |
| 235 _cache = {} |
| 236 |
| 237 class _SelectorContext: |
| 238 parent_map = None |
| 239 def __init__(self, root): |
| 240 self.root = root |
| 241 |
| 242 # -------------------------------------------------------------------- |
| 243 |
| 244 ## |
| 245 # Generate all matching objects. |
| 246 |
| 247 def iterfind(elem, path, namespaces=None): |
| 248 # compile selector pattern |
| 249 if path[-1:] == "/": |
| 250 path = path + "*" # implicit all (FIXME: keep this?) |
| 251 try: |
| 252 selector = _cache[path] |
| 253 except KeyError: |
| 254 if len(_cache) > 100: |
| 255 _cache.clear() |
| 256 if path[:1] == "/": |
| 257 raise SyntaxError("cannot use absolute path on element") |
| 258 next = iter(xpath_tokenizer(path, namespaces)).next |
| 259 token = next() |
| 260 selector = [] |
| 261 while 1: |
| 262 try: |
| 263 selector.append(ops[token[0]](next, token)) |
| 264 except StopIteration: |
| 265 raise SyntaxError("invalid path") |
| 266 try: |
| 267 token = next() |
| 268 if token[0] == "/": |
| 269 token = next() |
| 270 except StopIteration: |
| 271 break |
| 272 _cache[path] = selector |
| 273 # execute selector pattern |
| 274 result = [elem] |
| 275 context = _SelectorContext(elem) |
| 276 for select in selector: |
| 277 result = select(context, result) |
| 278 return result |
| 279 |
| 280 ## |
| 281 # Find first matching object. |
| 282 |
| 283 def find(elem, path, namespaces=None): |
| 284 try: |
| 285 return iterfind(elem, path, namespaces).next() |
| 286 except StopIteration: |
| 287 return None |
| 288 |
| 289 ## |
| 290 # Find all matching objects. |
| 291 |
| 292 def findall(elem, path, namespaces=None): |
| 293 return list(iterfind(elem, path, namespaces)) |
| 294 |
| 295 ## |
| 296 # Find text for first matching object. |
| 297 |
| 298 def findtext(elem, path, default=None, namespaces=None): |
| 299 try: |
| 300 elem = iterfind(elem, path, namespaces).next() |
| 301 return elem.text or "" |
| 302 except StopIteration: |
| 303 return default |
OLD | NEW |